zoukankan      html  css  js  c++  java
  • 通过HTTP向kafka发送数据

          在大数据整个处理流程过程中,数据的流向是一个很重要的问题,本篇博客主要记录数据是怎么从http发送到kafka的。

    使用技术点:

    1. java的Vert.x框架

      (关于java框架Vert.x的使用示例请移步:http://www.programcreek.com/java-api-examples/index.php?api=io.vertx.core.Vertx)

    2. KafkaProducer 的使用

      (使用示例移步:http://www.programcreek.com/java-api-examples/index.php?api=org.apache.kafka.clients.producer.KafkaProducer)

    在导数据的过程中需要实现的功能:

    1. 解析路径,将路径的最后一个字符串作为appkey;

    2. 数据缓存,当kafka无法正常访问时在本地cache目录缓存数据;

    3. 安全验证,对请求的appkey进行合法性验证;

    4. 自动更新appkey列表,每间隔一段时间获取一次最新的appkey列表;

    5. 增加ip字段,给每份数据增加ip字段;

    6. 记录日志,记录基本的统计信息日志,及异常错误信息。

    框架结构图如下所示:

    数据整体计算图如下所示:

    计算流程图如下所示:

    在IDEA中的代码结构为:

    主要代码如下:

    Configuration.java

    package com.donews.data;
    
    import com.typesafe.config.Config;
    import com.typesafe.config.ConfigFactory;
    
    /**
     * Created by reynold on 16-6-23.
     * 
     */
    public class Configuration {
       public static  final Config conf= ConfigFactory.load();
    }

    Counter.java

    package com.donews.data;
    
    
    import io.vertx.core.Vertx;
    import io.vertx.core.logging.Logger;
    import io.vertx.core.logging.LoggerFactory;
    
    import java.util.concurrent.TimeUnit;
    import java.util.concurrent.atomic.AtomicLong;
    /**
     * Created by reynold on 16-6-22.
     * 
     */
    public class Counter {
        private Logger LOG = LoggerFactory.getLogger(Counter.class);
        AtomicLong messages = new AtomicLong(0L);
        AtomicLong bytes = new AtomicLong(0L);
        private long start = System.currentTimeMillis();
    
        private void reset() {
            messages.set(0L);
            bytes.set(0L);
            start = System.currentTimeMillis();
        }
    
        public void start(Vertx vertx) {
            LOG.info("start Counter");
            long delay = Configuration.conf.getDuration("server.counter.delay", TimeUnit.MILLISECONDS);
            vertx.setPeriodic(delay, h -> {
                long time = System.currentTimeMillis() - start;
                double rps = messages.get() * 1000.0 / time;
                double mbps = (bytes.get() * 1000.0 / 1024.0 / 1024.0) / time;
                Runtime runtime = Runtime.getRuntime();
                double totalMem = runtime.totalMemory() * 1.0 / 1024 / 1024;
                double maxMem = runtime.maxMemory() * 1.0 / 1024 / 1024;
                double freeMem = runtime.freeMemory() * 1.0 / 1024 / 1024;
                LOG.info("{0}:Message/S, {1}:MBytes/S", rps, mbps);
                LOG.info("totalMem:{0}MB maxMem:{1}MB freeMem:{2}MB", totalMem, maxMem, freeMem);
                reset();
            });
        }
    
    
    }

    KafkaHttpServer.java

    package com.donews.data;
    
    import com.alibaba.fastjson.JSON;
    import com.alibaba.fastjson.JSONObject;
    import io.vertx.core.Vertx;
    import io.vertx.core.http.HttpServer;
    import io.vertx.core.http.HttpServerResponse;
    import io.vertx.core.json.JsonArray;
    import io.vertx.core.json.JsonObject;
    import io.vertx.core.logging.Logger;
    import io.vertx.core.logging.LoggerFactory;
    import io.vertx.ext.web.Router;
    import io.vertx.ext.web.RoutingContext;
    import io.vertx.ext.web.handler.BodyHandler;
    
    import java.io.*;
    import java.sql.*;
    import java.time.Instant;
    import java.util.HashSet;
    import java.util.Set;
    import java.util.Timer;
    import java.util.TimerTask;
    import java.util.concurrent.atomic.AtomicInteger;
    
    /**
     * Created by reynold on 16-6-22.
     * 
     */
    
    public class KafkaHttpServer {
        private static final Logger LOG = LoggerFactory.getLogger(KafkaHttpServer.class);
        private final Counter statistic = new Counter();
        private static final String DBDRIVER = "com.mysql.jdbc.Driver";
        private static final String URL = Configuration.conf.getString("mysql.url");
        private static final String USER = Configuration.conf.getString("mysql.user");
        private static final String PASSWORD = Configuration.conf.getString("mysql.password");
        private static HashSet<String> appkeys = new HashSet<>();
        private static boolean deleteFile = true;
    
        private void error(HttpServerResponse response, String message) {
            response.setStatusCode(500).end(new JsonObject()
                    .put("code", 3)
                    .put("msg", message)
                    .encode());
        }
    
        private void ok(HttpServerResponse response, String message) {
            response.putHeader("Access-Control-Allow-Origin", "*");
            response.setStatusCode(200).end(new JsonObject()
                    .put("code", 0)
                    .put("msg", message)
                    .encode());
        }
    
        private void startService(int port) {
            KafkaProducerWrapper sender = new KafkaProducerWrapper();
            Vertx vertx = Vertx.vertx();
            HttpServer server = vertx.createHttpServer();
            Router router = Router.router(vertx);
            router.route().handler(BodyHandler.create());
            router.route("/mininfo/logs").handler(ctx -> {
                try {
                    JsonArray array = ctx.getBodyAsJsonArray();
                    String[] messages = new String[array.size()];
                    for (int i = 0; i < array.size(); i++) {
                        JsonObject message = array.getJsonObject(i);
                        message.put("ip", ctx.request().remoteAddress().host());
                        if (!message.containsKey("timestamp")) {
                            message.put("timestamp", Instant.now().toString());
                        }
                        messages[i] = array.getJsonObject(i).encode();
                    }
                    sendMessages(sender, ctx, "appstatistic_production", messages);
                } catch (Exception e) {
                    error(ctx.response(), e.getMessage());
                }
    
            });
            router.routeWithRegex("/mininfo/v1/logs/[^/]+").handler(routingContext -> {
                String path = routingContext.request().path();
                String topic = path.substring(path.lastIndexOf("/") + 1);
                LOG.info("现在处理的topic(appkey)为:" + topic);
                if (appkeys.contains(topic)) {
                    LOG.info("经过验证,该topic(appkey)有效");
                    String[] messages = routingContext.getBodyAsString().split("
    ");
                    //用于执行阻塞任务(有序执行和无序执行),默认顺序执行提交的阻塞任务
                    vertx.executeBlocking(future -> {
                        sendMessages(sender, routingContext, topic, messages);
                        future.complete();
                    }, result -> {
                    });
                } else {
                    LOG.info("您的topic(appkey)还没有配置,请在mysql中配置先");
                    error(routingContext.response(), "please configurate " + topic + "(appkey) in Mysql first! After 10mins it`ll take action");
                }
            });
            router.route("/mininfo/v1/ip").handler(ctx -> {
                LOG.info("x-real-for" + ctx.request().getHeader("x-real-for"));
                LOG.info("x-forwarded-for" + ctx.request().getHeader("x-forwarded-for"));
                ok(ctx.response(), ctx.request().getHeader("x-forwarded-for"));
            });
            router.route("/*").handler(ctx -> error(ctx.response(), "wrong! check your path..."));
            server.requestHandler(router::accept).listen(port, result -> {
                if (result.succeeded()) {
                    LOG.info("listen on port:{0}", String.valueOf(port));
                    this.statistic.start(vertx);
                } else {
                    LOG.error(result.cause());
                    vertx.close();
                }
            });
            //如果你需要在你的程序关闭前采取什么措施,那么关闭钩子(shutdown hook)是很有用的,类似finally
            Runtime.getRuntime().addShutdownHook(new Thread(sender::close));
        }
    
        private void sendMessages(KafkaProducerWrapper sender, RoutingContext ctx, String topic, String[] messages) {
            AtomicInteger counter = new AtomicInteger(0);
            for (String message : messages) {
                if (message == null || "".equals(message)) {
                    ok(ctx.response(), "Success");
                    continue;
                }
                //将ip增加到数据的ip字段
                JSONObject jsonObject = JSON.parseObject(message);
                if (jsonObject.get("ip") == null) {
                    LOG.info("正在增加ip字段");
                    String ip;
                    String header = ctx.request().getHeader("x-forwarded-for");
                    if (!(header == null || header.trim().length() == 0 || header.trim().equals("null"))) {
                        ip = header.split(",")[0];
                    } else {
                        ip = ctx.request().remoteAddress().host();
                    }
                    jsonObject.put("ip", ip);
                    LOG.info("ip增加成功");
                }
                //topic, message, callback,以匿名函数的形式实现接口中的onCompletion函数
                sender.send(topic, jsonObject.toString(), (metadata, exception) -> {
                    if (exception != null) {
                        LOG.warn(exception);
                        String msg = new JsonObject()
                                .put("error", exception.getMessage())
                                .put("commit", counter.get())
                                .encode();
                        error(ctx.response(), msg);
                        cacheLocal(jsonObject.toString(), "/home/yuhui/httpkafka/data_bak/" + topic + ".txt");
                        LOG.info("连接kafka失败,写入cache缓存目录以备份数据");
                    } else {
                        statistic.messages.incrementAndGet();  // Counter
                        statistic.bytes.addAndGet(message.length());
                        if (counter.incrementAndGet() == messages.length) {
                            ok(ctx.response(), "Success");
                        }
                    }
                });
            }
        }
    
        /**
         * 将发送到kafka失败的消息缓存到本地
         *
         * @param message   message
         * @param cachePath cachePath
         */
        private void cacheLocal(String message, String cachePath) {
            try {
                FileWriter fileWriter = new FileWriter(cachePath, true);
                BufferedWriter bw = new BufferedWriter(fileWriter);
                bw.write(message);
                bw.newLine();
                bw.flush();
                bw.close();
            } catch (IOException e) {
                e.printStackTrace();
            }
        }
    
        /**
         * 发送缓存数据到kafka,发送成功,删除缓存数据,失败过10分钟重试
         *
         * @param path 保存缓存数据的[目录]
         */
        private static void sendToKafka(String path) {
            String message;
            KafkaProducerWrapper sender = new KafkaProducerWrapper();
            File file = new File(path);
            if (file.isDirectory()) {
                String[] fileList = file.list();
                if (fileList != null && fileList.length != 0) {
                    LOG.info("正在将缓存目录中的备份数据发送到kafka中...");
                    for (String str : fileList) {
                        String topic = str.split("\.")[0];
                        try {
                            BufferedReader reader = new BufferedReader(new FileReader(path + str));
                            while ((message = reader.readLine()) != null) {
                                sender.send(topic, message, (metadata, exception) -> {
                                    if (metadata != null) {
                                        LOG.info("缓存的备份数据正在一条一条的插入kafka中");
                                    } else {
                                        //程序错误重新运行
    //                                    exception.printStackTrace();
                                        LOG.error("kafka连接异常为:===> 10分钟后会自动重试," + exception.getMessage(), exception);
                                        deleteFile = false;
                                    }
                                });
                            }
                            if (deleteFile) {
                                LOG.info("开始删除已经插入到kafka中的缓存备份数据");
                                deleteFile(path, topic);
                                LOG.info("删除完毕!");
                            }
                            reader.close();
                        } catch (IOException e) {
                            e.printStackTrace();
                        }
                    }
                } else {
                    LOG.info("缓存目录中没有备份文件");
                }
            }
        }
    
        private static void deleteFile(String path, String appkey) {
            String appkeyPath = path + "/" + appkey + ".txt";
            File file = new File(appkeyPath);
            file.delete();
            LOG.info("成功删除appkey为" + appkey + "的缓存数据");
        }
    
        private static Set<String> getAppkeys() {
            Set<String> appkeys = new HashSet<>();
            String sql = "select appkey from config_table";
            try {
                Class.forName(DBDRIVER);
                Connection conn = DriverManager.getConnection(URL, USER, PASSWORD);
                PreparedStatement ps = conn.prepareStatement(sql);
                ResultSet rs = ps.executeQuery();
                while (rs.next()) {
                    appkeys.add(rs.getString(1));
                }
                rs.close();
                conn.close();
            } catch (ClassNotFoundException | SQLException e) {
                e.printStackTrace();
            }
            return appkeys;
        }
    
        public static void main(String[] args) throws Exception {
            Timer timer = new Timer();
            timer.schedule(new TimerTask() {
                @Override
                public void run() {
                    appkeys.addAll(getAppkeys());
                    LOG.info("同步完数据库中的appkey(每隔十分钟)");
                    sendToKafka("/home/leixingzhi7/httpkafka/data_bak/");
    //                sendToKafka("C:\Dell\UpdatePackage\log");
                }
            }, 0L, 10 * 60 * 1000L);
    
            try {
                int port = Configuration.conf.getInt("server.port");
                KafkaHttpServer front = new KafkaHttpServer();
                front.startService(port);
            } catch (Exception e) {
                e.printStackTrace();
            }
        }
    }

    KafkaProducerWrapper.java

    package com.donews.data;
    
    import com.typesafe.config.Config;
    import io.vertx.core.logging.Logger;
    import io.vertx.core.logging.LoggerFactory;
    import org.apache.kafka.clients.producer.Callback;
    import org.apache.kafka.clients.producer.KafkaProducer;
    import org.apache.kafka.clients.producer.ProducerRecord;
    
    
    import java.util.Properties;
    
    /**
     * Created by reynold on 16-6-22.
     * 
     */
    public class KafkaProducerWrapper {
        private Logger LOG = LoggerFactory.getLogger(KafkaProducerWrapper.class);
        private KafkaProducer<String, String> producer = init();
    
        private KafkaProducer<String, String> init() {
            Config conf = Configuration.conf.getConfig("kafka");
            Properties props = new Properties();
            props.put("bootstrap.servers", conf.getString("bootstrap.servers"));
            props.put("acks", conf.getString("acks"));
            props.put("retries", conf.getInt("retries"));
            props.put("batch.size", conf.getInt("batch.size"));
            props.put("linger.ms", conf.getInt("linger.ms"));
            props.put("buffer.memory", conf.getLong("buffer.memory"));
            props.put("key.serializer", conf.getString("key.serializer"));
            props.put("value.serializer", conf.getString("value.serializer"));
            LOG.info("KafkaProducer Properties: {0}", props.toString());
            return new KafkaProducer<>(props);
        }
    
        public void send(String topic, String message, Callback callback) {
            producer.send(new ProducerRecord<>(topic, message), callback);
        }
    
        public void close() {
            producer.close();
            LOG.info("Kafka Producer Closed");
        }
    
        public static void main(String[] args) {
            //KafkaProducerWrapper sender=new KafkaProducerWrapper();
            //sender.producer.partitionsFor("xxxxx").forEach(System.out::println);
        }
    }

    application.conf

    server {
      port = 20000
      counter.delay = 30s
    }
    kafka {
      bootstrap.servers = "XXX"
      acks = all
      retries = 1
      batch.size = 1048576
      linger.ms = 1
      buffer.memory = 33554432
      key.serializer = "org.apache.kafka.common.serialization.StringSerializer"
      value.serializer = "org.apache.kafka.common.serialization.StringSerializer"
    }
    mysql {
      url = "jdbc:mysql://XXX/user_privileges"
      user = "XXX"
      password = "XXX"
    }

    pom.xml

    <?xml version="1.0" encoding="UTF-8"?>
    <project xmlns="http://maven.apache.org/POM/4.0.0"
             xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
             xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
        <modelVersion>4.0.0</modelVersion>
    
        <groupId>com.donews.data</groupId>
        <artifactId>kafkahttp</artifactId>
        <version>1.0-SNAPSHOT</version>
        <dependencies>
            <!-- https://mvnrepository.com/artifact/com.typesafe/config -->
            <dependency>
                <groupId>com.typesafe</groupId>
                <artifactId>config</artifactId>
                <version>1.3.0</version>
            </dependency>
    
            <dependency>
                <groupId>io.vertx</groupId>
                <artifactId>vertx-web</artifactId>
                <version>3.2.1</version>
            </dependency>
            <dependency>
                <groupId>org.apache.kafka</groupId>
                <artifactId>kafka-clients</artifactId>
                <version>0.9.0.1</version>
            </dependency>
            <dependency>
                <groupId>mysql</groupId>
                <artifactId>mysql-connector-java</artifactId>
                <version>5.1.38</version>
            </dependency>
            <dependency>
                <groupId>com.alibaba</groupId>
                <artifactId>fastjson</artifactId>
                <version>1.2.11</version>
            </dependency>
        </dependencies>
    
        <build>
            <plugins>
                <plugin>
                    <groupId>org.apache.maven.plugins</groupId>
                    <artifactId>maven-compiler-plugin</artifactId>
                    <version>3.5.1</version>
                    <configuration>
                        <source>1.8</source>
                        <target>1.8</target>
                    </configuration>
                </plugin>
    
                <plugin>
                    <groupId>org.apache.maven.plugins</groupId>
                    <artifactId>maven-shade-plugin</artifactId>
                    <version>2.4</version>
                    <configuration>
                        <filters>
                            <filter>
                                <artifact>*:*</artifact>
                                <excludes>
                                    <exclude>META-INF/*.SF</exclude>
                                    <exclude>META-INF/*.DSA</exclude>
                                    <exclude>META-INF/*.RSA</exclude>
                                </excludes>
                            </filter>
                        </filters>
                        <!-- put your configurations here -->
                    </configuration>
                    <executions>
                        <execution>
                            <phase>package</phase>
                            <goals>
                                <goal>shade</goal>
                            </goals>
    
                            <configuration>
                                <transformers>
                                    <transformer implementation="org.apache.maven.plugins.shade.resource.ServicesResourceTransformer"/>
                                    <transformer implementation="org.apache.maven.plugins.shade.resource.ManifestResourceTransformer">
                                        <mainClass>com.donews.data.KafkaHttpServer</mainClass>
                                    </transformer>
                                </transformers>
                                <outputFile>${project.build.directory}/${project.artifactId}-fat.jar</outputFile>
                            </configuration>
                        </execution>
                    </executions>
                </plugin>
            </plugins>
        </build>
    
    
    </project>
  • 相关阅读:
    【转】Hibernate 配置
    【转】关于Log4j
    This project is not a myeclipse hibernate project . Assuming Hibernate 3 capabilities configuration editor
    java集合框架分析
    鸡蛋篮子与格子取数
    贪心与回溯与DP
    NP问题
    子集生成和全排列
    JDK动态代理实现原理(转)
    java类初始化/生命周期及反射及动态代理
  • 原文地址:https://www.cnblogs.com/leixingzhi7/p/6486010.html
Copyright © 2011-2022 走看看