zoukankan      html  css  js  c++  java
  • storm和kafka的wordCount

    这个是在window环境下面安装的kafka

    • 下载pom依赖

        <dependency>
            <groupId>org.apache.storm</groupId>
            <artifactId>storm-kafka-client</artifactId>
            <version>1.1.1</version>
          </dependency>
          <dependency>
            <groupId>org.apache.kafka</groupId>
            <artifactId>kafka-clients</artifactId>
            <version>0.10.0.0</version>
          </dependency>
          <dependency>
            <groupId>org.apache.storm</groupId>
            <artifactId>storm-core</artifactId>
            <version>1.1.1</version>
            <!--  本地测试注释集群运行打开 -->
            <!--  <scope>provided</scope>-->
          </dependency>
      

    编写代码

    • 编写SplitSentenceBolt
      public class SplitSentenceBolt extends BaseRichBolt {
      private OutputCollector collector;

      	@Override
      	public void prepare(Map map, TopologyContext topologyContext, OutputCollector outputCollector) {
      		this.collector=outputCollector;
      	}
      
      	@Override
      	public void execute(Tuple tuple) {
      		//String sentece = tuple.getStringByField("sentence");
      		String sentece=tuple.getString(4);
      		String[] words = sentece.split(" ");
      		for (String word:words){
      			collector.emit(new Values(word));
      		}
      	}
      
      	@Override
      	public void declareOutputFields(OutputFieldsDeclarer outputFieldsDeclarer) {
      		outputFieldsDeclarer.declare(new Fields("words"));
      	}
      }
      
    • 编写WordCountBolt
      public class WordCountBolt extends BaseRichBolt {

      	private  OutputCollector collector;
      	private HashMap<String,Long> counts =null;
      
      	@Override
      	public void prepare(Map map, TopologyContext topologyContext, OutputCollector outputCollector) {
      		this.collector = outputCollector;
      		this.counts = new HashMap<>();
      	}
      
      	@Override
      	public void execute(Tuple tuple) {
      		 String word = tuple.getStringByField("words");
      		// String word =tuple.getString(0);
      		Long count=this.counts.get(word);
      		if(count==null){
      			count=0L;
      		}
      		count++;
      		//出现就添加到map中,word相同的,会覆盖掉 所以最后的word就是准确的数据
      		this.counts.put(word,count);
      		this.collector.emit(new Values(word,count));
      	}
      
      	@Override
      	public void declareOutputFields(OutputFieldsDeclarer outputFieldsDeclarer) {
      			outputFieldsDeclarer.declare(new Fields("word","count"));
      	}
      }
      
    • 编写ReportBolt
      public class ReportBolt extends BaseRichBolt {
      private HashMap<String,Long> counts=null;
      @Override
      public void prepare(Map map, TopologyContext topologyContext, OutputCollector outputCollector) {
      this.counts=new HashMap<String,Long>();
      }

      	@Override
      	public void execute(Tuple input) {
      		String word=input.getStringByField("word");
      		Long count=input.getLongByField("count");
      		this.counts.put(word, count);
      
      
      		System.out.println("--------FINAL COUNTS--------");
      		List<String> keys=new ArrayList<String>();
      		keys.addAll(this.counts.keySet());
      		Collections.sort(keys);
      		for(String key:keys){
      			System.out.println(key+":"+this.counts.get(key));
      		}
      		System.out.println("----------------------------");
      	}
      
      	@Override
      	public void declareOutputFields(OutputFieldsDeclarer outputFieldsDeclarer) {
      
      	}
      }
      
    • 编写Topology
      public class MainTopology {
      public static void main(String[] args)throws Exception {
      TopologyBuilder builder = new TopologyBuilder();
      KafkaSpoutConfig.Builder<String, String> kafkabuilder = KafkaSpoutConfig.builder("localhost:9092", "test");

      		//设置kafka属于哪个组
      		kafkabuilder.setGroupId("testgroup");
      		//创建kafkaspoutConfig
      		KafkaSpoutConfig<String, String> build = kafkabuilder.build();
      		//通过kafkaspoutconfig获取kafkaspout
      		KafkaSpout<String, String> kafkaSpout = new KafkaSpout<>(build);
      		//设置四个线程接收数据
      		builder.setSpout("kafkaspout",kafkaSpout,4);
      //		builder.setBolt("printBolt", new PrintBolt(),2).localOrShuffleGrouping("kafkaspout");
      
      		builder.setBolt("split-bolt",new SplitSentenceBolt(),2).setNumTasks(4).shuffleGrouping("kafkaspout");
      		// 有时候我们需要将特定数据的tuple路由到特殊的bolt实例中,在此我们使用fieldsGrouping
      		// 来保证所有"word"字段值相同的tuple会被路由到同一个WordCountBolt实例中
      		builder.setBolt("count-bolt",new WordCountBolt(),2).fieldsGrouping("split-bolt",new Fields("words"));
      		builder.setBolt("report-bolt",new ReportBolt()).globalGrouping("count-bolt");
      
      		Config config=new Config();
      		config.setDebug(false);
      		config.setNumWorkers(2);
      		LocalCluster cluster =new LocalCluster();
      		cluster.submitTopology("kafkaspout",config,builder.createTopology());
      
      	}
  • 相关阅读:
    Android应用之个人应用软件开发(2)【签到功能和记账】
    抽象类判断日期能否被2整除
    Android应用之个人应用软件开发(3)【SQLite数据库及理财功能实现】
    移动终端网页游戏移植研发框架【服务器及客户端交互处理】
    DirectX学习资料
    列宁的故事
    Managed DirectX +C# 开发(入门篇)(七)
    Managed DirectX +C# 开发(入门篇)(六)
    Managed DirectX +C# 开发(入门篇)(三)
    C#不同窗口类中的变量相互调用
  • 原文地址:https://www.cnblogs.com/zhuguangzhe/p/9229808.html
Copyright © 2011-2022 走看看