1.环境介绍
如图所示,NODEJS做为数据源的的产生者产生消息,发到Kafka队列,然后参见红线,表示本地开发的环境下数据的流向(本地开发时,storm topology运行在本地模式)
2.搭建环境,我采用的是eclipse+maven
1.建立一个maven工程, 然后将pom文件修改如下:
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd"> <modelVersion>4.0.0</modelVersion> <groupId>com.h3c.storm</groupId> <artifactId>storm-samples</artifactId> <packaging>jar</packaging> <version>1.0-SNAPSHOT</version> <name>storm-kafka-test</name> <url>http://maven.apache.org</url> <dependencies> <dependency> <groupId>junit</groupId> <artifactId>junit</artifactId> <version>3.8.1</version> <scope>test</scope> </dependency> <dependency> <groupId>jdk.tools</groupId> <artifactId>jdk.tools</artifactId> <version>1.7</version> <scope>system</scope> <systemPath>${JAVA_HOME}/lib/tools.jar</systemPath> </dependency> <dependency> <groupId>org.apache.storm</groupId> <artifactId>storm-core</artifactId> <version>0.10.0</version> <!-- keep storm out of the jar-with-dependencies --> <scope>provided</scope> </dependency> <dependency> <groupId>org.apache.kafka</groupId> <artifactId>kafka_2.9.2</artifactId> <version>0.8.1.1</version> <exclusions> <exclusion> <groupId>org.apache.zookeeper</groupId> <artifactId>zookeeper</artifactId> </exclusion> <exclusion> <groupId>log4j</groupId> <artifactId>log4j</artifactId> </exclusion> </exclusions> </dependency> <dependency> <groupId>org.apache.storm</groupId> <artifactId>storm-kafka</artifactId> <version>0.9.2-incubating</version> </dependency> <dependency> <groupId>org.apache.storm</groupId> <artifactId>storm-hbase</artifactId> <version>0.10.0</version> </dependency> </dependencies> </project>
2.nodeJS发消息的示例代码,当然,首先要手动在kafka里新建一个topic对应代码里的topic,我这里创建的topic是"historyclients"
var kafka = require('kafka-node'); var Producer = kafka.Producer; var KeyedMessage = kafka.KeyedMessage; var conf = '172.27.8.111:2181,172.27.8.112:2181,172.27.8.119:2181'; var client = new kafka.Client(conf); var producer = new Producer(client); var clientOnlineInfo ={"clientMAC":"0000-0000-0002", "acSN":"210235A1AMB159000008", "onLineTime":"2016-06-27 10:00:00"}; var clientOnlineInfoStr = JSON.stringify(clientOnlineInfo); var msg = [ { topic: 'historyclients', messages: clientOnlineInfoStr, partition: 0 } ]; producer.on('ready', function () { producer.send(msg, function (err, data) { console.log("done!") console.log(data); }); }); producer.on('error', function (err) { console.error(err); });
3.spout代码
package com.h3c.storm; import java.util.HashMap; import java.util.List; import java.util.Map; import java.util.Properties; import backtype.storm.spout.SpoutOutputCollector; import backtype.storm.task.TopologyContext; import backtype.storm.topology.OutputFieldsDeclarer; import backtype.storm.topology.base.BaseRichSpout; import backtype.storm.tuple.Fields; import backtype.storm.tuple.Values; import kafka.consumer.ConsumerConfig; import kafka.consumer.ConsumerIterator; import kafka.consumer.KafkaStream; import kafka.javaapi.consumer.ConsumerConnector; public class KafkaSpout extends BaseRichSpout{ private SpoutOutputCollector collector; private ConsumerConnector consumer; private String topic; Map<String, List<KafkaStream<byte[], byte[]>>> consumerMap; private static ConsumerConfig createConsumerConfig() { Properties props = new Properties(); props.put("zookeeper.connect", "172.27.8.111:2181,172.27.8.112:2181,172.27.8.119:2181"); props.put("group.id", "group1"); props.put("zookeeper.session.timeout.ms", "40000"); props.put("zookeeper.sync.time.ms", "200"); props.put("auto.commit.interval.ms", "1000"); return new ConsumerConfig(props); } @Override public void open(Map conf, TopologyContext context,SpoutOutputCollector collector) { System.err.println("open!!!!!!!!!!!!!!!"); this.collector = collector; /* create consumer */ this.topic = "historyclients"; this.consumer = kafka.consumer.Consumer.createJavaConsumerConnector(createConsumerConfig()); /* topic HashMap,which means the map can include multiple topics */ Map<String, Integer> topicCountMap = new HashMap<String, Integer>(); topicCountMap.put(topic, new Integer(1)); this.consumerMap = consumer.createMessageStreams(topicCountMap); } @Override public void nextTuple() { KafkaStream<byte[], byte[]> stream = consumerMap.get(topic).get(0); ConsumerIterator<byte[], byte[]> it = stream.iterator(); String toSay = ""; while (it.hasNext()) { toSay = new String(it.next().message()); System.err.println("receive:" + toSay); this.collector.emit(new Values(toSay)); } } @Override public void declareOutputFields(OutputFieldsDeclarer declarer) { declarer.declare(new Fields("clientInfo")); } }
4.storm-hbase API 中要求实现的mapper代码
package com.h3c.storm; import org.apache.storm.hbase.bolt.mapper.HBaseMapper; import org.apache.storm.hbase.common.ColumnList; import backtype.storm.tuple.Tuple; public class MyHBaseMapper implements HBaseMapper { public ColumnList columns(Tuple tuple) { ColumnList cols = new ColumnList(); //参数依次是列族名,列名,值 cols.addColumn("f1".getBytes(), "colMAC".getBytes(), tuple.getStringByField("clientInfo").getBytes()); //System.err.println("BOLT + " + tuple.getStringByField("clientInfo")); //cols.addColumn("f1".getBytes(), "hhhhhhh".getBytes(), "0000-0000-0001".getBytes()); //System.err.println("BOLT + " + tuple.getStringByField("clientInfo")); return cols; } public byte[] rowKey(Tuple tuple) { //return tuple.getStringByField("clientInfo").getBytes(); return "newRowKey".getBytes(); } }
5.topology代码
package com.h3c.storm; import java.util.Map; import java.util.Random; import org.apache.storm.hbase.bolt.HBaseBolt; import org.apache.storm.hbase.bolt.mapper.HBaseMapper; import java.util.HashMap; import java.util.List; import java.util.Properties; import backtype.storm.Config; import backtype.storm.LocalCluster; import backtype.storm.StormSubmitter; import backtype.storm.spout.SpoutOutputCollector; import backtype.storm.task.TopologyContext; import backtype.storm.topology.BasicOutputCollector; import backtype.storm.topology.OutputFieldsDeclarer; import backtype.storm.topology.TopologyBuilder; import backtype.storm.topology.base.*; import backtype.storm.tuple.Fields; import backtype.storm.tuple.Tuple; import backtype.storm.tuple.Values; import backtype.storm.utils.Utils; import kafka.consumer.Consumer; import kafka.consumer.ConsumerConfig; import kafka.consumer.ConsumerIterator; import kafka.consumer.ConsumerTimeoutException; import kafka.consumer.KafkaStream; import kafka.javaapi.consumer.ConsumerConnector; import kafka.message.MessageAndMetadata; public class PersistTopology { private static final String KAFKA_SPOUT = "KAFKA_SPOUT"; private static final String HBASE_BOLT = "HBASE_BOLT"; public static void main(String[] args) throws Exception { /* define spout */ KafkaSpout kafkaSpout = new KafkaSpout(); System.setProperty("hadoop.home.dir", "E:\eclipse\"); /* define HBASE Bolt */ HBaseMapper mapper = new MyHBaseMapper(); HBaseBolt hbaseBolt = new HBaseBolt("historyclients", mapper).withConfigKey("hbase.conf"); /* define topology*/ TopologyBuilder builder = new TopologyBuilder(); builder.setSpout(KAFKA_SPOUT, kafkaSpout); builder.setBolt(HBASE_BOLT, hbaseBolt).shuffleGrouping(KAFKA_SPOUT); Config conf = new Config(); conf.setDebug(true); Map<String, Object> hbConf = new HashMap<String, Object>(); // if(args.length > 0){ // hbConf.put("hbase.rootdir", args[0]); // } //hbConf.put("hbase.rootdir", "hdfs://172.27.8.111:8020/apps/hbase/data"); conf.put("hbase.conf", hbConf); if (args != null && args.length > 0) { conf.setNumWorkers(3); StormSubmitter.submitTopology(args[0], conf, builder.createTopology()); } else { LocalCluster cluster = new LocalCluster(); cluster.submitTopology("test", conf, builder.createTopology()); Utils.sleep(600000); cluster.killTopology("test"); cluster.shutdown(); } } }
6.需要从集群中取中hbase-site.xml这个文件,加到项目里,在buildpath中可设置
7.在C:WindowsSystem32driversetc下把hosts文件加上到集群的IP与域名的映射
172.27.8.111 node1.hde.h3c.com node1
172.27.8.112 node2.hde.h3c.com node2
172.27.8.119 node3.hde.h3c.com node3
8. 出现java.io.IOException: Could not locate executable nullinwinutils.exe in the Hadoop binaries.的解决办法
网上下载winutils.exe这个文件,找一个地方放好,比如我放在E:eclipsein 下面,前面一定要有个“bin”
然后在代码里加上这句即可
System.setProperty("hadoop.home.dir", "E:\eclipse\");
参考文章