zoukankan      html  css  js  c++  java
  • spark 计算结果写入mysql 案例及常见问题解决

    package com.jxd

    import org.apache.spark.SparkContext
    import org.apache.spark.SparkConf
    import java.sql.Connection
    import java.sql.DriverManager
    object hello {
    def main(args: Array[String]): Unit = {
    var conf = new SparkConf().setAppName("Hello World")
    var sc = new SparkContext(conf)
    var input = sc.textFile("test/hello", 2)
    var count = input.flatMap(name => name.split(" ")).map((_, 1)).reduceByKey(((a, b) => a + b))
    count.foreachPartition(insertToMysql)

    }
    def insertToMysql(iterator: Iterator[(String, Int)]): Unit = {
    val driver = "com.mysql.jdbc.Driver"
    val url = "jdbc:mysql://192.168.10.58:3306/test"
    val username = "root"
    val password = "1"
    var connectionMqcrm: Connection = null
    Class.forName(driver)
    connectionMqcrm = DriverManager.getConnection(url, username, password)
    val sql = "INSERT INTO t_spark (`name`,`num`) VALUES (?,?)"
    iterator.foreach(data => {
    val statement = connectionMqcrm.prepareStatement(sql)
    statement.setString(1, data._1)
    statement.setInt(2, data._2)
    var result = statement.executeUpdate()
    if (result == 1) {
    println("写入mysql成功.............")
    }
    })
    }
    }

    Caused by: java.lang.ClassNotFoundException: com.mysql.jdbc.Driver
    at java.net.URLClassLoader.findClass(URLClassLoader.java:381)
    at java.lang.ClassLoader.loadClass(ClassLoader.java:424)
    at java.lang.ClassLoader.loadClass(ClassLoader.java:357)
    at java.lang.Class.forName0(Native Method)
    at java.lang.Class.forName(Class.java:264)
    at com.jxd.hello$.insertToMysql(hello.scala:22)
    at com.jxd.hello$$anonfun$main$1.apply(hello.scala:13)
    at com.jxd.hello$$anonfun$main$1.apply(hello.scala:13)
    at org.apache.spark.rdd.RDD$$anonfun$foreachPartition$1$$anonfun$apply$29.apply(RDD.scala:926)
    at org.apache.spark.rdd.RDD$$anonfun$foreachPartition$1$$anonfun$apply$29.apply(RDD.scala:926)
    at org.apache.spark.SparkContext$$anonfun$runJob$5.apply(SparkContext.scala:2069)
    at org.apache.spark.SparkContext$$anonfun$runJob$5.apply(SparkContext.scala:2069)
    at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:87)
    at org.apache.spark.scheduler.Task.run(Task.scala:108)
    at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:338)
    at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142)
    at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617)
    at java.lang.Thread.run(Thread.java:745)

    spark1.4以前版本 在/spark/jars 加入mysql驱动 并不起作用需在提交任务指定mysql驱动包

    例如:

    spark-submit --master spark://192.168.10.160:7077 --driver-class-path /usr/spark/jars/mysql-connector-java-5.1.18-bin.jar --class com.jxd.hello /usr/spark/wc.jar /usr/spark/test/hello

    高版本如 spark2.2已经修复此问题

    注意:集群中每一个server都得加入mysql驱动包(建议先加一个 然后采用远程复制)

    集群所有节点加完mysql驱动包后直接提交即可

    park-submit --master spark://192.168.10.160:7077 --class com.jxd.hello /usr/spark/wc.jar /usr/spark/test/hello

  • 相关阅读:
    hadoop2.2编程:MRUnit测试
    TestLinkConverter编程纪要
    Redisson的分布式锁的简单实现
    Jmeter工具使用
    JVM的学习(三)
    Mybatis需要注意的地方及知识点
    JVM学习二(JAVA的四种引用)
    mysql的引擎
    JVM学习一(JVM的内存结构和垃圾回收)
    JDk1.8HashMap的源码分析
  • 原文地址:https://www.cnblogs.com/coderdxj/p/8073268.html
Copyright © 2011-2022 走看看