zoukankan      html  css  js  c++  java
  • spark2.3 消费kafka0.10数据

    官网介绍

    http://spark.apache.org/docs/2.3.0/streaming-kafka-0-10-integration.html#creating-a-direct-stream

    案例pom.xml依赖

        <dependency>
          <groupId>org.apache.spark</groupId>
          <artifactId>spark-streaming_2.11</artifactId>
          <version>2.3.0</version>
          <!--      <scope>provided</scope>   -->
        </dependency>
        <dependency>
          <groupId>org.apache.spark</groupId>
          <artifactId>spark-streaming-kafka-0-10_2.11</artifactId>
          <version>2.3.0</version>
    package SpartStreamingaiqiyi
    import org.apache.spark._
    import org.apache.spark.streaming._
    import org.apache.kafka.clients.consumer.ConsumerRecord
    import org.apache.kafka.common.serialization.StringDeserializer
    import org.apache.spark.sql.SparkSession
    import org.apache.spark.streaming.kafka010._
    import org.apache.spark.streaming.kafka010.LocationStrategies.PreferConsistent
    import org.apache.spark.streaming.kafka010.ConsumerStrategies.Subscribe
    
    
    object test {
      def main(args: Array[String]): Unit = {
        val spark = SparkSession.builder()
          .appName("aiqiyi")
          .master("local[*]")
          .config("spark.serializer", "org.apache.spark.serializer.KryoSerializer")
          .getOrCreate()
        val sc = spark.sparkContext
        val checkpointDir = "F:\IdeaWorkspace\aiqiyi\ck"
        val ssc: StreamingContext = new StreamingContext(sc, Seconds(5))
        ssc.checkpoint(checkpointDir)
        val topics = Array("aiqiyi")
    
        // Create a local StreamingContext with two working thread and batch interval of 1 second.
        // The master requires 2 cores to prevent a starvation scenario.
        val kafkaParams = Map[String, Object](
          "bootstrap.servers" -> "dip005:9092,dip006:9092,dip007:9092",
          "key.deserializer" -> classOf[StringDeserializer],
          "value.deserializer" -> classOf[StringDeserializer],
          "group.id" -> "use_a_separate_group_id_for_each_stream",
          "auto.offset.reset" -> "latest",
          "enable.auto.commit" -> (false: java.lang.Boolean)
        )
    
        val stream = KafkaUtils.createDirectStream[String, String](
          ssc,
          PreferConsistent,
          Subscribe[String, String](topics, kafkaParams)
        )
        val resultDStream = stream.map(x=>x.value())
        resultDStream.print()
        ssc.start()
        ssc.awaitTermination()
      }
    
    }
  • 相关阅读:
    [zoj3627]模拟吧
    [zoj3623]背包模型
    [hdu4358]树状数组
    [hdu1272]并查集
    [hdu3308]线段树
    [hdu5033]单调队列
    [hdu1506]单调队列(栈)
    [hdu2888]二维RMQ
    [hdu4123]dfs区间化+RMQ
    [hdu1242]优先队列
  • 原文地址:https://www.cnblogs.com/students/p/12034744.html
Copyright © 2011-2022 走看看