zoukankan      html  css  js  c++  java
  • Spark在windows idea上报错:java.io.IOException: Could not locate executable nullinwinutils.exe in the Hadoop binaries.

    源代码:

    import org.apache.spark.SparkContext
    import org.apache.spark.SparkConf
    
    object WordCount {
      def main(args: Array[String]) {
        val inputFile =  "C://scalatext/hello.txt"
        val conf = new SparkConf().setAppName("WordCount")
        conf.setMaster("local")
        val sc = new SparkContext(conf)
        val textFile = sc.textFile(inputFile)
        val wordCount = textFile.flatMap(line => line.split(" ")).map(word => (word, 1)).reduceByKey((a, b) => a + b)
        //wordCount.foreach(println)
        wordCount.saveAsTextFile("C://scalatext/out")
      }
    }

    问题描述:

    只要涉及到保存到本地的操作(如:wordCount.saveAsTextFile("C://scalatext/out")),就会报错。但是只是打印(如:wordCount.foreach(println)),不会报错

    报错内容如下:

    20/03/31 22:57:53 ERROR Shell: Failed to locate the winutils binary in the hadoop binary path
    java.io.IOException: Could not locate executable nullinwinutils.exe in the Hadoop binaries.
    	at org.apache.hadoop.util.Shell.getQualifiedBinPath(Shell.java:278)
    	at org.apache.hadoop.util.Shell.getWinUtilsPath(Shell.java:300)
    	at org.apache.hadoop.util.Shell.<clinit>(Shell.java:293)
    	at org.apache.hadoop.util.StringUtils.<clinit>(StringUtils.java:76)
    	at org.apache.hadoop.mapred.FileInputFormat.setInputPaths(FileInputFormat.java:362)
    	at org.apache.spark.SparkContext$$anonfun$hadoopFile$1$$anonfun$29.apply(SparkContext.scala:1013)
    	at org.apache.spark.SparkContext$$anonfun$hadoopFile$1$$anonfun$29.apply(SparkContext.scala:1013)
    	at org.apache.spark.rdd.HadoopRDD$$anonfun$getJobConf$6.apply(HadoopRDD.scala:179)
    	at org.apache.spark.rdd.HadoopRDD$$anonfun$getJobConf$6.apply(HadoopRDD.scala:179)
    	at scala.Option.foreach(Option.scala:257)
    	at org.apache.spark.rdd.HadoopRDD.getJobConf(HadoopRDD.scala:179)
    	at org.apache.spark.rdd.HadoopRDD.getPartitions(HadoopRDD.scala:198)
    	at org.apache.spark.rdd.RDD$$anonfun$partitions$2.apply(RDD.scala:252)
    	at org.apache.spark.rdd.RDD$$anonfun$partitions$2.apply(RDD.scala:250)
    	at scala.Option.getOrElse(Option.scala:121)
    	at org.apache.spark.rdd.RDD.partitions(RDD.scala:250)
    	at org.apache.spark.rdd.MapPartitionsRDD.getPartitions(MapPartitionsRDD.scala:35)
    	at org.apache.spark.rdd.RDD$$anonfun$partitions$2.apply(RDD.scala:252)
    	at org.apache.spark.rdd.RDD$$anonfun$partitions$2.apply(RDD.scala:250)
    	at scala.Option.getOrElse(Option.scala:121)
    	at org.apache.spark.rdd.RDD.partitions(RDD.scala:250)
    	at org.apache.spark.rdd.MapPartitionsRDD.getPartitions(MapPartitionsRDD.scala:35)
    	at org.apache.spark.rdd.RDD$$anonfun$partitions$2.apply(RDD.scala:252)
    	at org.apache.spark.rdd.RDD$$anonfun$partitions$2.apply(RDD.scala:250)
    	at scala.Option.getOrElse(Option.scala:121)
    	at org.apache.spark.rdd.RDD.partitions(RDD.scala:250)
    	at org.apache.spark.rdd.MapPartitionsRDD.getPartitions(MapPartitionsRDD.scala:35)
    	at org.apache.spark.rdd.RDD$$anonfun$partitions$2.apply(RDD.scala:252)
    	at org.apache.spark.rdd.RDD$$anonfun$partitions$2.apply(RDD.scala:250)
    	at scala.Option.getOrElse(Option.scala:121)
    	at org.apache.spark.rdd.RDD.partitions(RDD.scala:250)
    	at org.apache.spark.Partitioner$$anonfun$defaultPartitioner$2.apply(Partitioner.scala:66)
    	at org.apache.spark.Partitioner$$anonfun$defaultPartitioner$2.apply(Partitioner.scala:66)
    	at scala.collection.TraversableLike$$anonfun$map$1.apply(TraversableLike.scala:234)
    	at scala.collection.TraversableLike$$anonfun$map$1.apply(TraversableLike.scala:234)
    	at scala.collection.immutable.List.foreach(List.scala:381)
    	at scala.collection.TraversableLike$class.map(TraversableLike.scala:234)
    	at scala.collection.immutable.List.map(List.scala:285)
    	at org.apache.spark.Partitioner$.defaultPartitioner(Partitioner.scala:66)
    	at org.apache.spark.rdd.PairRDDFunctions$$anonfun$reduceByKey$3.apply(PairRDDFunctions.scala:331)
    	at org.apache.spark.rdd.PairRDDFunctions$$anonfun$reduceByKey$3.apply(PairRDDFunctions.scala:331)
    	at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151)
    	at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:112)
    	at org.apache.spark.rdd.RDD.withScope(RDD.scala:362)
    	at org.apache.spark.rdd.PairRDDFunctions.reduceByKey(PairRDDFunctions.scala:330)
    	at WordCount$.main(WordCount.scala:13)
    	at WordCount.main(WordCount.scala)
    20/03/31 22:57:54 ERROR Executor: Exception in task 0.0 in stage 1.0 (TID 1)
    java.lang.NullPointerException
    	at java.lang.ProcessBuilder.start(ProcessBuilder.java:1012)
    	at org.apache.hadoop.util.Shell.runCommand(Shell.java:404)
    	at org.apache.hadoop.util.Shell.run(Shell.java:379)
    	at org.apache.hadoop.util.Shell$ShellCommandExecutor.execute(Shell.java:589)
    	at org.apache.hadoop.util.Shell.execCommand(Shell.java:678)
    	at org.apache.hadoop.util.Shell.execCommand(Shell.java:661)
    	at org.apache.hadoop.fs.RawLocalFileSystem.setPermission(RawLocalFileSystem.java:639)
    	at org.apache.hadoop.fs.FilterFileSystem.setPermission(FilterFileSystem.java:468)
    	at org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:456)
    	at org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:424)
    	at org.apache.hadoop.fs.FileSystem.create(FileSystem.java:905)
    	at org.apache.hadoop.fs.FileSystem.create(FileSystem.java:798)
    	at org.apache.hadoop.mapred.TextOutputFormat.getRecordWriter(TextOutputFormat.java:123)
    	at org.apache.spark.SparkHadoopWriter.open(SparkHadoopWriter.scala:90)
    	at org.apache.spark.rdd.PairRDDFunctions$$anonfun$saveAsHadoopDataset$1$$anonfun$13.apply(PairRDDFunctions.scala:1206)
    	at org.apache.spark.rdd.PairRDDFunctions$$anonfun$saveAsHadoopDataset$1$$anonfun$13.apply(PairRDDFunctions.scala:1197)
    	at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:87)
    	at org.apache.spark.scheduler.Task.run(Task.scala:99)
    	at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:322)
    	at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
    	at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
    	at java.lang.Thread.run(Thread.java:748)
    20/03/31 22:57:54 ERROR TaskSetManager: Task 0 in stage 1.0 failed 1 times; aborting job
    Exception in thread "main" org.apache.spark.SparkException: Job aborted due to stage failure: Task 0 in stage 1.0 failed 1 times, most recent failure: Lost task 0.0 in stage 1.0 (TID 1, localhost, executor driver): java.lang.NullPointerException
    	at java.lang.ProcessBuilder.start(ProcessBuilder.java:1012)
    	at org.apache.hadoop.util.Shell.runCommand(Shell.java:404)
    	at org.apache.hadoop.util.Shell.run(Shell.java:379)
    	at org.apache.hadoop.util.Shell$ShellCommandExecutor.execute(Shell.java:589)
    	at org.apache.hadoop.util.Shell.execCommand(Shell.java:678)
    	at org.apache.hadoop.util.Shell.execCommand(Shell.java:661)
    	at org.apache.hadoop.fs.RawLocalFileSystem.setPermission(RawLocalFileSystem.java:639)
    	at org.apache.hadoop.fs.FilterFileSystem.setPermission(FilterFileSystem.java:468)
    	at org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:456)
    	at org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:424)
    	at org.apache.hadoop.fs.FileSystem.create(FileSystem.java:905)
    	at org.apache.hadoop.fs.FileSystem.create(FileSystem.java:798)
    	at org.apache.hadoop.mapred.TextOutputFormat.getRecordWriter(TextOutputFormat.java:123)
    	at org.apache.spark.SparkHadoopWriter.open(SparkHadoopWriter.scala:90)
    	at org.apache.spark.rdd.PairRDDFunctions$$anonfun$saveAsHadoopDataset$1$$anonfun$13.apply(PairRDDFunctions.scala:1206)
    	at org.apache.spark.rdd.PairRDDFunctions$$anonfun$saveAsHadoopDataset$1$$anonfun$13.apply(PairRDDFunctions.scala:1197)
    	at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:87)
    	at org.apache.spark.scheduler.Task.run(Task.scala:99)
    	at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:322)
    	at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
    	at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
    	at java.lang.Thread.run(Thread.java:748)
    
    Driver stacktrace:
    	at org.apache.spark.scheduler.DAGScheduler.org$apache$spark$scheduler$DAGScheduler$$failJobAndIndependentStages(DAGScheduler.scala:1435)
    	at org.apache.spark.scheduler.DAGScheduler$$anonfun$abortStage$1.apply(DAGScheduler.scala:1423)
    	at org.apache.spark.scheduler.DAGScheduler$$anonfun$abortStage$1.apply(DAGScheduler.scala:1422)
    	at scala.collection.mutable.ResizableArray$class.foreach(ResizableArray.scala:59)
    	at scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:48)
    	at org.apache.spark.scheduler.DAGScheduler.abortStage(DAGScheduler.scala:1422)
    	at org.apache.spark.scheduler.DAGScheduler$$anonfun$handleTaskSetFailed$1.apply(DAGScheduler.scala:802)
    	at org.apache.spark.scheduler.DAGScheduler$$anonfun$handleTaskSetFailed$1.apply(DAGScheduler.scala:802)
    	at scala.Option.foreach(Option.scala:257)
    	at org.apache.spark.scheduler.DAGScheduler.handleTaskSetFailed(DAGScheduler.scala:802)
    	at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.doOnReceive(DAGScheduler.scala:1650)
    	at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:1605)
    	at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:1594)
    	at org.apache.spark.util.EventLoop$$anon$1.run(EventLoop.scala:48)
    	at org.apache.spark.scheduler.DAGScheduler.runJob(DAGScheduler.scala:628)
    	at org.apache.spark.SparkContext.runJob(SparkContext.scala:1925)
    	at org.apache.spark.SparkContext.runJob(SparkContext.scala:1938)
    	at org.apache.spark.SparkContext.runJob(SparkContext.scala:1958)
    	at org.apache.spark.rdd.PairRDDFunctions$$anonfun$saveAsHadoopDataset$1.apply$mcV$sp(PairRDDFunctions.scala:1226)
    	at org.apache.spark.rdd.PairRDDFunctions$$anonfun$saveAsHadoopDataset$1.apply(PairRDDFunctions.scala:1168)
    	at org.apache.spark.rdd.PairRDDFunctions$$anonfun$saveAsHadoopDataset$1.apply(PairRDDFunctions.scala:1168)
    	at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151)
    	at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:112)
    	at org.apache.spark.rdd.RDD.withScope(RDD.scala:362)
    	at org.apache.spark.rdd.PairRDDFunctions.saveAsHadoopDataset(PairRDDFunctions.scala:1168)
    	at org.apache.spark.rdd.PairRDDFunctions$$anonfun$saveAsHadoopFile$4.apply$mcV$sp(PairRDDFunctions.scala:1071)
    	at org.apache.spark.rdd.PairRDDFunctions$$anonfun$saveAsHadoopFile$4.apply(PairRDDFunctions.scala:1037)
    	at org.apache.spark.rdd.PairRDDFunctions$$anonfun$saveAsHadoopFile$4.apply(PairRDDFunctions.scala:1037)
    	at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151)
    	at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:112)
    	at org.apache.spark.rdd.RDD.withScope(RDD.scala:362)
    	at org.apache.spark.rdd.PairRDDFunctions.saveAsHadoopFile(PairRDDFunctions.scala:1037)
    	at org.apache.spark.rdd.PairRDDFunctions$$anonfun$saveAsHadoopFile$1.apply$mcV$sp(PairRDDFunctions.scala:963)
    	at org.apache.spark.rdd.PairRDDFunctions$$anonfun$saveAsHadoopFile$1.apply(PairRDDFunctions.scala:963)
    	at org.apache.spark.rdd.PairRDDFunctions$$anonfun$saveAsHadoopFile$1.apply(PairRDDFunctions.scala:963)
    	at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151)
    	at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:112)
    	at org.apache.spark.rdd.RDD.withScope(RDD.scala:362)
    	at org.apache.spark.rdd.PairRDDFunctions.saveAsHadoopFile(PairRDDFunctions.scala:962)
    	at org.apache.spark.rdd.RDD$$anonfun$saveAsTextFile$1.apply$mcV$sp(RDD.scala:1489)
    	at org.apache.spark.rdd.RDD$$anonfun$saveAsTextFile$1.apply(RDD.scala:1468)
    	at org.apache.spark.rdd.RDD$$anonfun$saveAsTextFile$1.apply(RDD.scala:1468)
    	at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151)
    	at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:112)
    	at org.apache.spark.rdd.RDD.withScope(RDD.scala:362)
    	at org.apache.spark.rdd.RDD.saveAsTextFile(RDD.scala:1468)
    	at WordCount$.main(WordCount.scala:15)
    	at WordCount.main(WordCount.scala)
    Caused by: java.lang.NullPointerException
    	at java.lang.ProcessBuilder.start(ProcessBuilder.java:1012)
    	at org.apache.hadoop.util.Shell.runCommand(Shell.java:404)
    	at org.apache.hadoop.util.Shell.run(Shell.java:379)
    	at org.apache.hadoop.util.Shell$ShellCommandExecutor.execute(Shell.java:589)
    	at org.apache.hadoop.util.Shell.execCommand(Shell.java:678)
    	at org.apache.hadoop.util.Shell.execCommand(Shell.java:661)
    	at org.apache.hadoop.fs.RawLocalFileSystem.setPermission(RawLocalFileSystem.java:639)
    	at org.apache.hadoop.fs.FilterFileSystem.setPermission(FilterFileSystem.java:468)
    	at org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:456)
    	at org.apache.hadoop.fs.ChecksumFileSystem.create(ChecksumFileSystem.java:424)
    	at org.apache.hadoop.fs.FileSystem.create(FileSystem.java:905)
    	at org.apache.hadoop.fs.FileSystem.create(FileSystem.java:798)
    	at org.apache.hadoop.mapred.TextOutputFormat.getRecordWriter(TextOutputFormat.java:123)
    	at org.apache.spark.SparkHadoopWriter.open(SparkHadoopWriter.scala:90)
    	at org.apache.spark.rdd.PairRDDFunctions$$anonfun$saveAsHadoopDataset$1$$anonfun$13.apply(PairRDDFunctions.scala:1206)
    	at org.apache.spark.rdd.PairRDDFunctions$$anonfun$saveAsHadoopDataset$1$$anonfun$13.apply(PairRDDFunctions.scala:1197)
    	at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:87)
    	at org.apache.spark.scheduler.Task.run(Task.scala:99)
    	at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:322)
    	at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
    	at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
    	at java.lang.Thread.run(Thread.java:748)
    
    Process finished with exit code 1
    

      

    解决:

    下载:https://github.com/srccodes/hadoop-common-2.2.0-bin,解压

    HADOOP_HOME的环境变量指向解压的路径:HADOOP_HOME=C:/hadoop-common-2.2.0-bin-master

    加入不配置环境遍历,还可以在代码的main方法里加入语句:

    System.setProperty("hadoop.home.dir","C://hadoop-common-2.2.0-bin-master")
    

      

  • 相关阅读:
    NumPy数组基本的索引和切片
    赫夫曼树编码解码实例(C)
    深度优先迷宫求解实例(C)
    创建ndarray的方法
    【学习笔记】计算机网络-利用TELNET进行SMTP的邮件发送
    【学习笔记】非递归实现先后根遍历二叉树
    【学习笔记】计算机网络-DNS层次查询
    【学习笔记】计算机网络-网络常用命令(一)
    【学习笔记】计算机网络-Ping命令(一)
    Win10下Wireshark找不到接口的解决办法
  • 原文地址:https://www.cnblogs.com/Alcesttt/p/12609463.html
Copyright © 2011-2022 走看看