zoukankan      html  css  js  c++  java
  • Spark Streaming + Kafka 整合向导之createDirectStream

    启动zk: zkServer.sh start

    启动kafka:kafka-server-start.sh $KAFKA_HOME/config/server.properties

    创建一个topic:kafka-topics.sh --create --zookeeper node1:2181 --replication-factor 1 --partitions 1 --topic test

    启动一个生产者:kafka-console-producer.sh --broker-list node1:9092 --topic test

    运行代码测试:

    package com.lin.spark
    
    import org.apache.kafka.common.serialization.StringDeserializer
    import org.apache.spark.SparkConf
    import org.apache.spark.rdd.RDD
    import org.apache.spark.streaming.{Seconds, StreamingContext}
    import org.apache.spark.streaming.kafka010._
    import org.apache.spark.streaming.kafka010.LocationStrategies.PreferConsistent
    import org.apache.spark.streaming.kafka010.ConsumerStrategies.Subscribe
    
    /**
      * Created by Administrator on 2019/6/7.
      */
    object Halo {
      def main(args: Array[String]): Unit = {
        val kafkaParams = Map[String, Object](
          "bootstrap.servers" -> "node1:9092",
          "key.deserializer" -> classOf[StringDeserializer],
          "value.deserializer" -> classOf[StringDeserializer],
          "group.id" -> "use_a_separate_group_id_for_each_stream",
          "auto.offset.reset" -> "latest",
          "enable.auto.commit" -> (true: java.lang.Boolean)
        )
    
        val conf = new SparkConf().setAppName("Halo").setMaster("local[2]")
        val ssc = new StreamingContext(conf,Seconds(5))
    
        val topics = Array("test")
        val stream = KafkaUtils.createDirectStream[String, String](
          ssc,
          PreferConsistent,
          Subscribe[String, String](topics, kafkaParams)
        )
    
        stream.foreachRDD(rdd => {
          val offsetRange = rdd.asInstanceOf[HasOffsetRanges].offsetRanges
          val maped: RDD[(String, String)] = rdd.map(record => (record.key,record.value))
          //计算逻辑
          maped.foreach(println)
          //循环输出
          for(o <- offsetRange){
            println(s"${o.topic}  ${o.partition} ${o.fromOffset} ${o.untilOffset}")
          }
        })
    
        ssc.start()
        ssc.awaitTermination()
      }
    }

    参考:

    http://spark.apache.org/docs/2.2.0/streaming-kafka-0-10-integration.html

    https://cloud.tencent.com/developer/article/1355430

  • 相关阅读:
    HYSBZ 1500 [NOI2005]维修数列 splay
    The 15th Zhejiang University Programming Contest
    工作小助手-v1.0正式上线,欢迎体验!!!
    登录窗体登录失败但是MainForm依然弹出无法结束的解决方法
    报错'cannot change visible in onshow or onhide'
    release模式发布软件的方法
    发布软件时因为窗体自动加载次序不对导致报错00000000
    修改类别 (类实现)两种方法
    从记事本导入记录
    快速粘贴
  • 原文地址:https://www.cnblogs.com/linkmust/p/10990848.html
Copyright © 2011-2022 走看看