zoukankan      html  css  js  c++  java
  • flume+kafka+storm打通过程

    0.有的地方我已经整理成脚本了,有的命令是脚本里面截取的

    1.启动hadoop和yarn

     $HADOOP_HOME/sbin/start-dfs.sh;$HADOOP_HOME/sbin/start-yarn.sh

    2.启动zk

    #主机名是mini-mini3所以这里可以遍历
    echo "start zkserver "
    for i in 1 2 3
    do
    ssh mini$i "source /etc/profile;$ZK_HOME/bin/zkServer.sh start"
    done

    3.启动mysqld

    service mysqld start

    4.启动kafka,集群都要启动

    bin/kafka-server-start.sh config/server.properties

    5.启动storm

    nimbus.host所属的机器上启动 nimbus服务

    nohup ./storm nimbus &

    nimbus.host所属的机器上启动ui服务

    nohup ./storm ui &

    在其它机器上启动supervisor服务

    nohup ./storm supervisor &

    6.启动flume

    #exec.conf
     
    a1.channels = r1
    a1.sources = c1
    a1.sinks = k1
    
    #a1.sources.c1.type = spooldir  #实时性要求不高的话,可以用这种方式,ta
    #a1.sources.c1.channels = r1
    #a1.sources.c1.spoolDir = /opt/flumeSpool/
    #a1.sources.c1.fileHeader = false
    
    a1.sources.c1.type = exec
    a1.sources.c1.command = tail -F /home/hadoop/kafkastudy/data/flume_sour
    a1.sources.c1.channels = r1
    
    a1.sinks.k1.type = org.apache.flume.sink.kafka.KafkaSink
    a1.sinks.k1.topic = orderMq
    a1.sinks.k1.brokerList = mini1:9092,mini2:9092,mini3:9092
    a1.sinks.k1.requiredAcks = 1
    a1.sinks.k1.batchSize = 20
    a1.sinks.k1.channel = r1
    
    a1.channels.r1.type   = memory
    a1.channels.r1.capacity = 10000
    a1.channels.r1.transactionCapacity = 1000

    bin/flume-ng agent --conf conf --conf-file conf/myconf/exec.conf --name a1 -Dflume.root.logger=INFO,console

    7.启动造数据的程序

    #!/bin/bash
    for((i=0;i<50000;i++))
    do
    echo "msg-"+$i >> /home/hadoop/kafkastudy/data/flume_sources/click_log/1.log
    done

    8在mini1:8080上观察

    总结

    a.造数据和flume之间的链接是在exec.conf文件中配置了flume监听了文件,这个文件是造数据成员生成的,这里相当于数据源

    b.flume和kafka之间的链接1在exec.conf中配置了.使用kafka的shell消费消息命令可以查看到

    bin/kafka-console-consumer.sh --zookeeper mini1:2181  --topic test1  

    c.kafka和storm之间的链接,是由于我们在storm上运行了自己定义的一个程序,这个程序就是kafka2tostorm,在程序中指定了KafaSpout.同时还包含了自己的业务

    d.

    package kafkaAndStorm2;
    
    import backtype.storm.Config;
    import backtype.storm.LocalCluster;
    import backtype.storm.StormSubmitter;
    import backtype.storm.generated.AlreadyAliveException;
    import backtype.storm.generated.InvalidTopologyException;
    import backtype.storm.topology.TopologyBuilder;
    import storm.kafka.BrokerHosts;
    import storm.kafka.KafkaSpout;
    import storm.kafka.SpoutConfig;
    import storm.kafka.ZkHosts;
    
    /**
     
     */
    public class KafkaAndStormTopologyMain {
        public static void main(String[] args) throws AlreadyAliveException, InvalidTopologyException, InterruptedException {
            TopologyBuilder topologyBuilder = new TopologyBuilder();
    
            SpoutConfig config = new SpoutConfig(new ZkHosts("mini1:2181,mini2:2181,mini3:2181"),
                    "orderMq",
                    "/mykafka",
                    "kafkaSpout");
            topologyBuilder.setSpout("kafkaSpout",new KafkaSpout(config),1 );
            topologyBuilder.setBolt("mybolt1",new MyKafkaBolt2(),1).shuffleGrouping("kafkaSpout");
    
            Config conf = new Config();
            //打印调试信息
           // conf.setDebug(true);
            if (args!=null && args.length>0) {
                StormSubmitter.submitTopology(args[0], conf, topologyBuilder.createTopology());
            }else {
                LocalCluster localCluster = new LocalCluster();
                localCluster.submitTopology("storm2kafka", conf, topologyBuilder.createTopology());
            }
        }
    
    
    }
    package kafkaAndStorm2;
    
    import backtype.storm.task.OutputCollector;
    import backtype.storm.task.TopologyContext;
    import backtype.storm.topology.BasicOutputCollector;
    import backtype.storm.topology.IBasicBolt;
    import backtype.storm.topology.IRichBolt;
    import backtype.storm.topology.OutputFieldsDeclarer;
    import backtype.storm.topology.base.BaseRichBolt;
    import backtype.storm.tuple.Tuple;
    
    import java.util.Map;
    
    /**
     */
    public class MyKafkaBolt2 extends BaseRichBolt {
        @Override
        public void prepare(Map stormConf, TopologyContext context, OutputCollector collector) {
    
        }
    
        @Override
        public void execute(Tuple input) {
            byte[] value = (byte[]) input.getValue(0);
            String msg = new String(value);
            System.out.println(Thread.currentThread().getId()+"  msg  "+msg);
        }
    
    
        @Override
        public void cleanup() {
        }
        @Override
        public void declareOutputFields(OutputFieldsDeclarer declarer) {
        }
        @Override
        public Map<String, Object> getComponentConfiguration() {
            return null;
        }
    }
    

      maven依赖,这里可能需要根据错误提示调一下

    <?xml version="1.0" encoding="UTF-8"?>
    <project xmlns="http://maven.apache.org/POM/4.0.0"
             xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
             xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
        <modelVersion>4.0.0</modelVersion>
    
        <groupId>cn.itcast.learn</groupId>
        <artifactId>kafka2Strom</artifactId>
        <version>1.0-SNAPSHOT</version>
    
        <dependencies>
            <dependency>
                <groupId>org.apache.storm</groupId>
                <artifactId>storm-core</artifactId>
                <version>0.9.5</version>
                <scope>provided</scope>
                <!--<scope>provided</scope>-->
            </dependency>
            <dependency>
                <groupId>org.apache.storm</groupId>
                <artifactId>storm-kafka</artifactId>
                <version>0.9.5</version>
                <exclusions>
                    <exclusion>
                        <groupId>org.slf4j</groupId>
                        <artifactId>slf4j-log4j12</artifactId>
                    </exclusion>
                    <exclusion>
                        <groupId>org.slf4j</groupId>
                        <artifactId>slf4j-api</artifactId>
                    </exclusion>
                </exclusions>
            </dependency>
            <dependency>
                <groupId>org.clojure</groupId>
                <artifactId>clojure</artifactId>
                <version>1.5.1</version>
            </dependency>
            <dependency>
                <groupId>org.apache.kafka</groupId>
                <artifactId>kafka_2.8.2</artifactId>
                <version>0.8.1</version>
                <exclusions>
                    <exclusion>
                        <artifactId>jmxtools</artifactId>
                        <groupId>com.sun.jdmk</groupId>
                    </exclusion>
                    <exclusion>
                        <artifactId>jmxri</artifactId>
                        <groupId>com.sun.jmx</groupId>
                    </exclusion>
                    <exclusion>
                        <artifactId>jms</artifactId>
                        <groupId>javax.jms</groupId>
                    </exclusion>
                    <exclusion>
                        <groupId>org.apache.zookeeper</groupId>
                        <artifactId>zookeeper</artifactId>
                    </exclusion>
                    <exclusion>
                        <groupId>org.slf4j</groupId>
                        <artifactId>slf4j-log4j12</artifactId>
                    </exclusion>
                    <exclusion>
                        <groupId>org.slf4j</groupId>
                        <artifactId>slf4j-api</artifactId>
                    </exclusion>
                </exclusions>
            </dependency>
            <dependency>
                <groupId>com.google.code.gson</groupId>
                <artifactId>gson</artifactId>
                <version>2.4</version>
            </dependency>
            <dependency>
                <groupId>redis.clients</groupId>
                <artifactId>jedis</artifactId>
                <version>2.7.3</version>
            </dependency>
        </dependencies>
    
        <build>
            <plugins>
                <plugin>
                    <artifactId>maven-assembly-plugin</artifactId>
                    <configuration>
                        <descriptorRefs>
                            <descriptorRef>jar-with-dependencies</descriptorRef>
                        </descriptorRefs>
                        <archive>
                            <manifest>
                                <mainClass>cn.itcast.bigdata.hadoop.mapreduce.wordcount.WordCount</mainClass>
                            </manifest>
                        </archive>
                    </configuration>
                    <executions>
                        <execution>
                            <id>make-assembly</id>
                            <phase>package</phase>
                            <goals>
                                <goal>single</goal>
                            </goals>
                        </execution>
                    </executions>
                </plugin>
                <plugin>
                    <groupId>org.apache.maven.plugins</groupId>
                    <artifactId>maven-compiler-plugin</artifactId>
                    <configuration>
                       <!-- <source>1.7</source>
                        <target>1.7</target>-->
                        <source>1.8</source>
                        <target>1.8</target>
                    </configuration>
                </plugin>
            </plugins>
        </build>
    
    </project>
  • 相关阅读:
    说一下 session 的工作原理?
    session 和 cookie 有什么区别?
    说一下 JSP 的 4 种作用域?
    jsp有哪些内置对象?作用分别是什么?
    MVC的各个部分都有那些技术来实现?如何实现?
    request.getAttribute()和 request.getParameter()有何区别?
    Servlet API中forward()与redirect()的区别?
    jsp和servlet的区别、共同点、各自应用的范围?
    说一说Servlet的生命周期?
    如何从CDN加载jQuery?
  • 原文地址:https://www.cnblogs.com/rocky-AGE-24/p/7159065.html
Copyright © 2011-2022 走看看