zoukankan      html  css  js  c++  java
  • nodejs+kafka+storm+hbase 开发

    1.环境介绍

     如图所示,NODEJS做为数据源的的产生者产生消息,发到Kafka队列,然后参见红线,表示本地开发的环境下数据的流向(本地开发时,storm topology运行在本地模式)

    2.搭建环境,我采用的是eclipse+maven

    1.建立一个maven工程, 然后将pom文件修改如下:

    <project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
      xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
      <modelVersion>4.0.0</modelVersion>
      <groupId>com.h3c.storm</groupId>
      <artifactId>storm-samples</artifactId>
      <packaging>jar</packaging>
      <version>1.0-SNAPSHOT</version>
      <name>storm-kafka-test</name>
      <url>http://maven.apache.org</url>
      <dependencies>
        <dependency>
          <groupId>junit</groupId>
          <artifactId>junit</artifactId>
          <version>3.8.1</version>
          <scope>test</scope>
        </dependency>
        <dependency>  
            <groupId>jdk.tools</groupId>  
            <artifactId>jdk.tools</artifactId>  
            <version>1.7</version>  
            <scope>system</scope>  
            <systemPath>${JAVA_HOME}/lib/tools.jar</systemPath>  
        </dependency>    
        <dependency>
          <groupId>org.apache.storm</groupId>
          <artifactId>storm-core</artifactId>
          <version>0.10.0</version>
          <!-- keep storm out of the jar-with-dependencies -->
          <scope>provided</scope>
        </dependency>
        
        <dependency>
            <groupId>org.apache.kafka</groupId>
            <artifactId>kafka_2.9.2</artifactId>
            <version>0.8.1.1</version>
            <exclusions>
                <exclusion>
                    <groupId>org.apache.zookeeper</groupId>
                    <artifactId>zookeeper</artifactId>
                </exclusion>
                <exclusion>
                    <groupId>log4j</groupId>
                    <artifactId>log4j</artifactId>
                </exclusion>
            </exclusions>
        </dependency>
           
        <dependency>  
          <groupId>org.apache.storm</groupId>  
             <artifactId>storm-kafka</artifactId>  
              <version>0.9.2-incubating</version>  
        </dependency>
        
        <dependency>
        <groupId>org.apache.storm</groupId>
        <artifactId>storm-hbase</artifactId>
        <version>0.10.0</version>
        </dependency>  
      </dependencies>
    </project>
    View Code

    2.nodeJS发消息的示例代码,当然,首先要手动在kafka里新建一个topic对应代码里的topic,我这里创建的topic是"historyclients"

    var kafka = require('kafka-node');
    var Producer = kafka.Producer;
    var KeyedMessage = kafka.KeyedMessage;
    var conf = '172.27.8.111:2181,172.27.8.112:2181,172.27.8.119:2181';
    var client = new kafka.Client(conf);
    var producer = new Producer(client);
    
    var clientOnlineInfo ={"clientMAC":"0000-0000-0002",
                                      "acSN":"210235A1AMB159000008",
                                      "onLineTime":"2016-06-27 10:00:00"};
    
    var clientOnlineInfoStr = JSON.stringify(clientOnlineInfo);
    
    var msg = [
        { topic: 'historyclients', messages: clientOnlineInfoStr, partition: 0 }
    ];
    
    
    producer.on('ready', function () {
            producer.send(msg, function (err, data) {
                console.log("done!")
                console.log(data);
            });
    });
    
    producer.on('error', function (err) {
        console.error(err);
    });
    View Code

    3.spout代码

    package com.h3c.storm;
    
    import java.util.HashMap;
    import java.util.List;
    import java.util.Map;
    import java.util.Properties;
    
    import backtype.storm.spout.SpoutOutputCollector;
    import backtype.storm.task.TopologyContext;
    import backtype.storm.topology.OutputFieldsDeclarer;
    import backtype.storm.topology.base.BaseRichSpout;
    import backtype.storm.tuple.Fields;
    import backtype.storm.tuple.Values;
    import kafka.consumer.ConsumerConfig;
    import kafka.consumer.ConsumerIterator;
    import kafka.consumer.KafkaStream;
    import kafka.javaapi.consumer.ConsumerConnector;
    
    public class KafkaSpout extends BaseRichSpout{
        private SpoutOutputCollector collector;
        private  ConsumerConnector consumer; 
        private  String topic; 
        Map<String, List<KafkaStream<byte[], byte[]>>> consumerMap;
        
        private static ConsumerConfig createConsumerConfig()  
        {  
            Properties props = new Properties();  
            props.put("zookeeper.connect", "172.27.8.111:2181,172.27.8.112:2181,172.27.8.119:2181");  
            props.put("group.id", "group1");  
            props.put("zookeeper.session.timeout.ms", "40000");  
            props.put("zookeeper.sync.time.ms", "200");  
            props.put("auto.commit.interval.ms", "1000");  
            return new ConsumerConfig(props);  
        }   
        
        @Override
        public void open(Map conf, TopologyContext context,SpoutOutputCollector collector) {
            System.err.println("open!!!!!!!!!!!!!!!");
            this.collector = collector;
            
            /* create consumer */
            this.topic = "historyclients";
            this.consumer = kafka.consumer.Consumer.createJavaConsumerConnector(createConsumerConfig()); 
            
            /* topic HashMap,which means the map can include multiple topics */
            Map<String, Integer> topicCountMap = new HashMap<String, Integer>();
            topicCountMap.put(topic, new Integer(1));  
            this.consumerMap = consumer.createMessageStreams(topicCountMap);  
        }
    
        @Override
        public void nextTuple() {
            
            KafkaStream<byte[], byte[]> stream = consumerMap.get(topic).get(0);  
            ConsumerIterator<byte[], byte[]> it = stream.iterator(); 
            String toSay = "";
            while (it.hasNext()) {
                toSay = new String(it.next().message());
                System.err.println("receive:" + toSay);  
                this.collector.emit(new Values(toSay));
            }              
        }
    
        @Override
        public void declareOutputFields(OutputFieldsDeclarer declarer) {
            declarer.declare(new Fields("clientInfo"));
        }
    }
    View Code

    4.storm-hbase API 中要求实现的mapper代码

    package com.h3c.storm;
    
    import org.apache.storm.hbase.bolt.mapper.HBaseMapper;
    import org.apache.storm.hbase.common.ColumnList;
    
    import backtype.storm.tuple.Tuple;
    
    public class MyHBaseMapper implements HBaseMapper {
    
          public ColumnList columns(Tuple tuple) {
            
            ColumnList cols = new ColumnList();
            
            //参数依次是列族名,列名,值
            cols.addColumn("f1".getBytes(), "colMAC".getBytes(), tuple.getStringByField("clientInfo").getBytes());
            //System.err.println("BOLT + " + tuple.getStringByField("clientInfo"));
            
            //cols.addColumn("f1".getBytes(), "hhhhhhh".getBytes(), "0000-0000-0001".getBytes());
            //System.err.println("BOLT + " + tuple.getStringByField("clientInfo"));
            return cols;
          }
    
          public byte[] rowKey(Tuple tuple) {
                     
            //return tuple.getStringByField("clientInfo").getBytes();
            return "newRowKey".getBytes(); 
          }
        }
    Mapper

    5.topology代码

    package com.h3c.storm;
    
    import java.util.Map;
    import java.util.Random;
    
    import org.apache.storm.hbase.bolt.HBaseBolt;
    import org.apache.storm.hbase.bolt.mapper.HBaseMapper;
    
    import java.util.HashMap;  
    import java.util.List;  
    
    import java.util.Properties;  
    import backtype.storm.Config;
    import backtype.storm.LocalCluster;
    import backtype.storm.StormSubmitter;
    import backtype.storm.spout.SpoutOutputCollector;
    import backtype.storm.task.TopologyContext;
    import backtype.storm.topology.BasicOutputCollector;
    import backtype.storm.topology.OutputFieldsDeclarer;
    import backtype.storm.topology.TopologyBuilder;
    import backtype.storm.topology.base.*;
    import backtype.storm.tuple.Fields;
    import backtype.storm.tuple.Tuple;
    import backtype.storm.tuple.Values;
    import backtype.storm.utils.Utils;
    import kafka.consumer.Consumer;
    import kafka.consumer.ConsumerConfig;
    import kafka.consumer.ConsumerIterator;
    import kafka.consumer.ConsumerTimeoutException;
    import kafka.consumer.KafkaStream;
    import kafka.javaapi.consumer.ConsumerConnector;
    import kafka.message.MessageAndMetadata;
    
    public class PersistTopology {
    
        private static final String KAFKA_SPOUT = "KAFKA_SPOUT";
        private static final String HBASE_BOLT = "HBASE_BOLT";
        
        public static void main(String[] args) throws Exception {
            
            /* define spout */
            KafkaSpout kafkaSpout = new KafkaSpout();
            
            System.setProperty("hadoop.home.dir", "E:\eclipse\");
            
            /* define HBASE Bolt */
            HBaseMapper mapper = new MyHBaseMapper();
            HBaseBolt hbaseBolt = new HBaseBolt("historyclients", mapper).withConfigKey("hbase.conf");
            
            /* define topology*/
            TopologyBuilder builder = new TopologyBuilder();
            builder.setSpout(KAFKA_SPOUT, kafkaSpout);
            builder.setBolt(HBASE_BOLT, hbaseBolt).shuffleGrouping(KAFKA_SPOUT);
    
            Config conf = new Config();
            conf.setDebug(true);
    
            Map<String, Object> hbConf = new HashMap<String, Object>();
    //        if(args.length > 0){
    //            hbConf.put("hbase.rootdir", args[0]);
    //        }
            //hbConf.put("hbase.rootdir", "hdfs://172.27.8.111:8020/apps/hbase/data");
            conf.put("hbase.conf", hbConf);
            
            if (args != null && args.length > 0) {
                conf.setNumWorkers(3);
    
                StormSubmitter.submitTopology(args[0], conf, builder.createTopology());
            } else {
    
                LocalCluster cluster = new LocalCluster();
                cluster.submitTopology("test", conf, builder.createTopology());
                Utils.sleep(600000);
                cluster.killTopology("test");
                cluster.shutdown();
            }
        }
    }
    View Code

    6.需要从集群中取中hbase-site.xml这个文件,加到项目里,在buildpath中可设置

    7.在C:WindowsSystem32driversetc下把hosts文件加上到集群的IP与域名的映射

    172.27.8.111 node1.hde.h3c.com node1
    172.27.8.112 node2.hde.h3c.com node2
    172.27.8.119 node3.hde.h3c.com node3

    8. 出现java.io.IOException: Could not locate executable nullinwinutils.exe in the Hadoop binaries.的解决办法

    网上下载winutils.exe这个文件,找一个地方放好,比如我放在E:eclipsein 下面,前面一定要有个“bin”

    然后在代码里加上这句即可

    System.setProperty("hadoop.home.dir", "E:\eclipse\");

    参考文章 

    http://www.tuicool.com/articles/r6ZZBjU

  • 相关阅读:
    Too many authentication failures for xxxx_username
    [linux]ngrep命令、常见用法
    pip安装icu失败:Command "python setup.py egg_info" failed with error code 1 in
    peewee insert 数据时报错:'buffer' object has no attribute 'translate'
    SQL Server 加密案例解析
    MyBatis学习笔记
    01-hibernate注解:类级别注解,@Entity,@Table,@Embeddable
    01-hibernate注解:类级别注解准备工作
    11-hibernate,单表GRUD操作实例
    10-hibernate单表操作-组件属性
  • 原文地址:https://www.cnblogs.com/zhengchunhao/p/5630052.html
Copyright © 2011-2022 走看看