zoukankan      html  css  js  c++  java
  • Storm手写WordCount

    建立一个maven项目,在pom.xml中进行如下配置:

    <project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
        xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
        <modelVersion>4.0.0</modelVersion>
        <groupId>cn.darrenchan</groupId>
        <artifactId>StormDemo</artifactId>
        <version>0.0.1-SNAPSHOT</version>
        <name>StormDemo</name>
    
        <dependencies>
            <dependency>
                <groupId>org.apache.storm</groupId>
                <artifactId>storm-core</artifactId>
                <version>0.9.5</version>
                <!--<scope>provided</scope> -->
            </dependency>
        </dependencies>
        <build>
            <plugins>
                <plugin>
                    <artifactId>maven-assembly-plugin</artifactId>
                    <configuration>
                        <descriptorRefs>
                            <descriptorRef>jar-with-dependencies</descriptorRef>
                        </descriptorRefs>
                        <archive>
                            <manifest>
                                <mainClass>cn.itcast.bigdata.hadoop.mapreduce.wordcount.WordCount</mainClass>
                            </manifest>
                        </archive>
                    </configuration>
                    <executions>
                        <execution>
                            <id>make-assembly</id>
                            <phase>package</phase>
                            <goals>
                                <goal>single</goal>
                            </goals>
                        </execution>
                    </executions>
                </plugin>
                <plugin>
                    <groupId>org.apache.maven.plugins</groupId>
                    <artifactId>maven-compiler-plugin</artifactId>
                    <configuration>
                        <source>1.7</source>
                        <target>1.7</target>
                    </configuration>
                </plugin>
            </plugins>
        </build>
    
    </project>

    项目目录为:

    MySpout.java:

    package cn.darrenchan.storm;
    
    import java.util.Map;
    
    import backtype.storm.spout.SpoutOutputCollector;
    import backtype.storm.task.TopologyContext;
    import backtype.storm.topology.OutputFieldsDeclarer;
    import backtype.storm.topology.base.BaseRichSpout;
    import backtype.storm.tuple.Fields;
    import backtype.storm.tuple.Values;
    
    public class MySpout extends BaseRichSpout {
        
        private SpoutOutputCollector collector;
    
        //storm框架不停地调用nextTuple方法
    //values继承ArrayList
    @Override public void nextTuple() { collector.emit(new Values("i am lilei love hanmeimei")); } //初始化方法 @Override public void open(Map config, TopologyContext context, SpoutOutputCollector collector) { this.collector = collector; } //声明本spout组件发送出去的tuple中的数据的字段名 @Override public void declareOutputFields(OutputFieldsDeclarer declarer) { declarer.declare(new Fields("love")); } }

    MySplitBolt.java:

    package cn.darrenchan.storm;
    
    import java.util.Map;
    
    import backtype.storm.task.OutputCollector;
    import backtype.storm.task.TopologyContext;
    import backtype.storm.topology.OutputFieldsDeclarer;
    import backtype.storm.topology.base.BaseRichBolt;
    import backtype.storm.tuple.Fields;
    import backtype.storm.tuple.Tuple;
    import backtype.storm.tuple.Values;
    
    public class MySplitBolt extends BaseRichBolt {
        
        private OutputCollector collector;
    
        //storm框架不停地调用,传入参数是tutle
        @Override
        public void execute(Tuple input) {
            String line = input.getString(0);
            String[] words = line.split(" ");
            for (String word : words) {
    //Values有两个,对应下面Fields有两个 collector.emit(
    new Values(word, 1)); } } //初始化方法 @Override public void prepare(Map config, TopologyContext context, OutputCollector collector) { this.collector = collector; } @Override public void declareOutputFields(OutputFieldsDeclarer declarer) {
    //Fields有两个,对应上面Values有两个 declarer.declare(
    new Fields("word", "num")); } }

    MyCountBolt.java:

    package cn.darrenchan.storm;
    
    import java.util.HashMap;
    import java.util.Map;
    
    import backtype.storm.task.OutputCollector;
    import backtype.storm.task.TopologyContext;
    import backtype.storm.topology.OutputFieldsDeclarer;
    import backtype.storm.topology.base.BaseRichBolt;
    import backtype.storm.tuple.Tuple;
    
    public class MyCountBolt extends BaseRichBolt {
        
        private OutputCollector collector;
        private Map<String, Integer> map;
    
        @Override
        public void prepare(Map stormConf, TopologyContext context, OutputCollector collector) {
            this.collector = collector;
            map = new HashMap<String, Integer>();
        }
    
        @Override
        public void execute(Tuple input) {
            String word = input.getString(0);
            Integer num = input.getInteger(1);
            if(map.containsKey(word)){
                map.put(word, map.get(word) + num);
            } else {
                map.put(word, 1);
            }
            
            System.out.println(map);
        }
    
        @Override
        public void declareOutputFields(OutputFieldsDeclarer declarer) {
            
        }
    
    }

    WordCountTopoloyMain.java:

    package cn.darrenchan.storm;
    
    import backtype.storm.Config;
    import backtype.storm.LocalCluster;
    import backtype.storm.topology.TopologyBuilder;
    import backtype.storm.tuple.Fields;
    
    public class WordCountTopoloyMain {
        public static void main(String[] args) throws Exception {
            //1.准备一个TopologyBuilder
            TopologyBuilder builder = new TopologyBuilder();
            builder.setSpout("mySpout", new MySpout(), 1);
            builder.setBolt("mySplitBolt", new MySplitBolt(), 2).shuffleGrouping("mySpout");
            builder.setBolt("myCountBolt", new MyCountBolt(), 2).fieldsGrouping("mySplitBolt", new Fields("word"));
            
            //2.创建一个configuration,用来指定当前的topology需要的worker的数量
            Config config = new Config();
            config.setNumWorkers(4);
            
            //3.任务提交 两种模式————本地模式和集群模式
            //集群模式
            //StormSubmitter.submitTopology("myWordCount", config, builder.createTopology());
            //本地模式
            LocalCluster localCluster = new LocalCluster();
            localCluster.submitTopology("myWordCount", config, builder.createTopology());
        }
    }

    三种求wordcount方式 比较:

    整体运行架构图:

  • 相关阅读:
    MySQL SQL语句总结
    JavaFx通用查询、跳转创建、修改页面、关闭舞台工具类
    反射工具类
    SpringBoot将数据写入word模板,生成word文档
    vscode设置中文的方法
    Flex 布局
    处理程序“ExtensionlessUrlHandler-Integrated-4.0”在其模块列表中有一个错误模块“ManagedPipelineHandler” •使用了托管的处理程序,但是未安装或未完整安装 ASP.NET。
    第一周助教总结
    JS时间转换
    ES 安装常见错误
  • 原文地址:https://www.cnblogs.com/DarrenChan/p/7125687.html
Copyright © 2011-2022 走看看