zoukankan      html  css  js  c++  java
  • hadoop本地运行wordcount报错解决,并成功运行

    Hadoop 中的Hello world 代码如下:

      1 package com.wordcount;
      2 
      3 import org.apache.hadoop.conf.Configuration;
      4 import org.apache.hadoop.conf.Configured;
      5 import org.apache.hadoop.fs.FileSystem;
      6 import org.apache.hadoop.fs.Path;
      7 import org.apache.hadoop.io.IntWritable;
      8 import org.apache.hadoop.io.LongWritable;
      9 import org.apache.hadoop.io.Text;
     10 import org.apache.hadoop.mapreduce.Job;
     11 import org.apache.hadoop.mapreduce.Mapper;
     12 import org.apache.hadoop.mapreduce.Reducer;
     13 import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
     14 import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
     15 import org.apache.hadoop.util.Tool;
     16 import org.apache.hadoop.util.ToolRunner;
     17 
     18 import java.io.IOException;
     19 
     20 /**
     21  * @program: hadoop_demo
     22  * @description:
     23  * @author: Mr.Walloce
     24  * @create: 2018/11/03 15:04
     25  **/
     26 public class WordCount extends Configured implements Tool {
     27 
     28     /**
     29      * <LongWritable, Text, Text, IntWritable> 输入和输出的key-value类型
     30      */
     31     static class MyMap extends Mapper<LongWritable, Text, Text, IntWritable> {
     32         //结果输出的字符串
     33         Text out_key = new Text();
     34 
     35         //结果输出的默认值
     36         IntWritable out_value = new IntWritable(1);
     37 
     38         /**
     39          * @param key     输入的字符串的偏移量
     40          * @param value   输入的字符串
     41          * @param context
     42          * @throws IOException
     43          * @throws InterruptedException
     44          */
     45         @Override
     46         protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
     47             System.out.println("map阶段开始执行,,,");
     48             String line = value.toString();
     49             long index = key.get();
     50             //对字符串进行处理,获取到单词
     51             String[] words = line.split(" ");
     52             if (words.length > 0) {
     53                 for (String word : words) {
     54                     out_key.set(word);
     55                     context.write(out_key, out_value);
     56                 }
     57             }
     58             System.out.println("map阶段结束。。。");
     59         }
     60     }
     61 
     62     /**
     63      * <Text, IntWritable, Text, IntWritable>输入和输出的key-value类型
     64      */
     65     static class MyReduce extends Reducer<Text, IntWritable, Text, IntWritable> {
     66         private IntWritable result = new IntWritable();
     67 
     68         @Override
     69         protected void reduce(Text key, Iterable<IntWritable> values, Context context) throws IOException, InterruptedException {
     70             System.out.println("Reduce阶段开始执行...");
     71             int sum = 0;
     72             for (IntWritable value : values) {
     73                 sum += value.get();
     74             }
     75             result.set(sum);
     76             System.out.println("单词" + key.toString() + ":  " + result.get());
     77             context.write(key, result);
     78             System.out.println("Reduce阶段结束。。。");
     79         }
     80     }
     81 
     82     static class MyCombiner extends Reducer<Text, IntWritable, Text, IntWritable> {
     83         private IntWritable result = new IntWritable();
     84 
     85         @Override
     86         protected void reduce(Text key, Iterable<IntWritable> values, Context context) throws IOException, InterruptedException {
     87             System.out.println("Combiner阶段开始...");
     88             int sum = 0;
     89             for (IntWritable value : values) {
     90                 sum += value.get();
     91             }
     92             result.set(sum);
     93             context.write(key, result);
     94             System.out.println("Combiner阶段结束。。。");
     95         }
     96     }
     97 
     98     public int run(String[] args) throws Exception {
     99 
    100         //Hadoop的八股文
    101         Configuration conf = this.getConf();
    102         Job job = Job.getInstance(conf, this.getClass().getSimpleName());
    103 
    104         //************************对job进行具体的设置*************************
    105         //在集群中运行时不写会报错,本地运行科不写(最好写上)
    106         job.setJarByClass(WordCount.class);
    107 
    108         //设置输入输出路径
    109         Path in_path = new Path(args[0]);
    110         FileInputFormat.addInputPath(job, in_path);
    111         Path out_path = new Path(args[1]);
    112         FileOutputFormat.setOutputPath(job, out_path);
    113 
    114         //输出前判断输出路径是否存在,存在则删除(输出路径不能重复)
    115         FileSystem fs = out_path.getFileSystem(conf);
    116         if (fs.exists(out_path)) {
    117             fs.delete(out_path, true);
    118         }
    119 
    120         //运行map类相关的参数设置
    121         job.setMapperClass(MyMap.class);
    122         job.setMapOutputKeyClass(Text.class);
    123         job.setMapOutputValueClass(IntWritable.class);
    124 
    125         //运行Shuffle相关的参数设置
    126         job.setCombinerClass(MyCombiner.class);
    127 
    128         //设置reduce类相关的参数设置
    129         job.setReducerClass(MyReduce.class);
    130         job.setOutputKeyClass(Text.class);
    131         job.setOutputValueClass(IntWritable.class);
    132 
    133         //运行是否成功
    134         boolean isSuccess = job.waitForCompletion(true);
    135 
    136         //运行成功返回0,反之返回1
    137         return isSuccess ? 0 : 1;
    138     }
    139 
    140     public static void main(String args[]) {
    141         Configuration conf = new Configuration();
    142 
    143         args = new String[]{
    144                 "hdfs://walloce.one:8020/walloce/data/test.txt",
    145                 "hdfs://walloce.one:8020/walloce/output/"+ Math.random()
    146         };
    147 
    148         try {
    149             ToolRunner.run(conf, new WordCount(), args);
    150         } catch (Exception e) {
    151             e.printStackTrace();
    152         }
    153     }
    154 }

    运行异常代码:

    18/11/22 15:06:00 WARN util.NativeCodeLoader: Unable to load native-hadoop library for your platform... using builtin-java classes where applicable
    18/11/22 15:06:02 INFO client.RMProxy: Connecting to ResourceManager at walloce.one/192.168.206.143:8032
    18/11/22 15:06:03 WARN mapreduce.JobSubmitter: No job jar file set.  User classes may not be found. See Job or Job#setJar(String).
    18/11/22 15:06:03 INFO input.FileInputFormat: Total input paths to process : 1
    18/11/22 15:06:03 INFO mapreduce.JobSubmitter: number of splits:1
    18/11/22 15:06:04 INFO mapreduce.JobSubmitter: Submitting tokens for job: job_1542897380554_0004
    18/11/22 15:06:04 INFO mapred.YARNRunner: Job jar is not present. Not adding any jar to the list of resources.
    18/11/22 15:06:04 INFO impl.YarnClientImpl: Submitted application application_1542897380554_0004
    18/11/22 15:06:04 INFO mapreduce.Job: The url to track the job: http://walloce.one:8088/proxy/application_1542897380554_0004/
    18/11/22 15:06:04 INFO mapreduce.Job: Running job: job_1542897380554_0004
    18/11/22 15:06:08 INFO mapreduce.Job: Job job_1542897380554_0004 running in uber mode : false
    18/11/22 15:06:08 INFO mapreduce.Job:  map 0% reduce 0%
    18/11/22 15:06:08 INFO mapreduce.Job: Job job_1542897380554_0004 failed with state FAILED due to: Application application_1542897380554_0004 failed 2 times due to AM Container for appattempt_1542897380554_0004_000002 exited with  exitCode: 1 due to: Exception from container-launch: ExitCodeException exitCode=1: /bin/bash: line 0: fg: no job control
    
    ExitCodeException exitCode=1: /bin/bash: line 0: fg: no job control
    
        at org.apache.hadoop.util.Shell.runCommand(Shell.java:538)
        at org.apache.hadoop.util.Shell.run(Shell.java:455)
        at org.apache.hadoop.util.Shell$ShellCommandExecutor.execute(Shell.java:702)
        at org.apache.hadoop.yarn.server.nodemanager.DefaultContainerExecutor.launchContainer(DefaultContainerExecutor.java:195)
        at org.apache.hadoop.yarn.server.nodemanager.containermanager.launcher.ContainerLaunch.call(ContainerLaunch.java:300)
        at org.apache.hadoop.yarn.server.nodemanager.containermanager.launcher.ContainerLaunch.call(ContainerLaunch.java:81)
        at java.util.concurrent.FutureTask.run(FutureTask.java:262)
        at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1145)
        at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:615)
        at java.lang.Thread.run(Thread.java:745)
    
    
    Container exited with a non-zero exit code 1
    .Failing this attempt.. Failing the application.

    检查配置文件yarn-site.xml:

    1     <!-- 指定计算模型在yarn上 -->
    2     <property>
    3         <name>mapreduce.framework.name</name>
    4         <value>yarn</value>
    5     </property>

    由于是在本地运行,所以计算模型要指定在本地:

    1     <!-- 指定计算模型在yarn上 -->
    2     <property>
    3         <name>mapreduce.framework.name</name>
    4         <value>local</value>
    5     </property>

    文件修改后,可以运行成功。。

    运行结果:

       

      1 "C:Program FilesJavajdk1.8.0_162injava.exe" "-javaagent:C:Program FilesJetBrainsIntelliJ IDEA Community Edition 2018.1.6libidea_rt.jar=13319:C:Program FilesJetBrainsIntelliJ IDEA Community Edition 2018.1.6in" -Dfile.encoding=UTF-8 -classpath "C:Program FilesJavajdk1.8.0_162jrelibcharsets.jar;C:Program FilesJavajdk1.8.0_162jrelibdeploy.jar;C:Program FilesJavajdk1.8.0_162jrelibextaccess-bridge-64.jar;C:Program FilesJavajdk1.8.0_162jrelibextcldrdata.jar;C:Program FilesJavajdk1.8.0_162jrelibextdnsns.jar;C:Program FilesJavajdk1.8.0_162jrelibextjaccess.jar;C:Program FilesJavajdk1.8.0_162jrelibextjfxrt.jar;C:Program FilesJavajdk1.8.0_162jrelibextlocaledata.jar;C:Program FilesJavajdk1.8.0_162jrelibext
    ashorn.jar;C:Program FilesJavajdk1.8.0_162jrelibextsunec.jar;C:Program FilesJavajdk1.8.0_162jrelibextsunjce_provider.jar;C:Program FilesJavajdk1.8.0_162jrelibextsunmscapi.jar;C:Program FilesJavajdk1.8.0_162jrelibextsunpkcs11.jar;C:Program FilesJavajdk1.8.0_162jrelibextzipfs.jar;C:Program FilesJavajdk1.8.0_162jrelibjavaws.jar;C:Program FilesJavajdk1.8.0_162jrelibjce.jar;C:Program FilesJavajdk1.8.0_162jrelibjfr.jar;C:Program FilesJavajdk1.8.0_162jrelibjfxswt.jar;C:Program FilesJavajdk1.8.0_162jrelibjsse.jar;C:Program FilesJavajdk1.8.0_162jrelibmanagement-agent.jar;C:Program FilesJavajdk1.8.0_162jrelibplugin.jar;C:Program FilesJavajdk1.8.0_162jrelib
    esources.jar;C:Program FilesJavajdk1.8.0_162jrelib
    t.jar;E:IdeaWorkspacehadoop_demo	argetclasses;E:
    epository3orgapachehadoophadoop-common2.5.0hadoop-common-2.5.0.jar;E:
    epository3orgapachehadoophadoop-annotations2.5.0hadoop-annotations-2.5.0.jar;C:Program FilesJavajdk1.8.0_162lib	ools.jar;E:
    epository3comgoogleguavaguava11.0.2guava-11.0.2.jar;E:
    epository3commons-clicommons-cli1.2commons-cli-1.2.jar;E:
    epository3orgapachecommonscommons-math33.1.1commons-math3-3.1.1.jar;E:
    epository3xmlencxmlenc.52xmlenc-0.52.jar;E:
    epository3commons-httpclientcommons-httpclient3.1commons-httpclient-3.1.jar;E:
    epository3commons-codeccommons-codec1.4commons-codec-1.4.jar;E:
    epository3commons-iocommons-io2.4commons-io-2.4.jar;E:
    epository3commons-netcommons-net3.1commons-net-3.1.jar;E:
    epository3commons-collectionscommons-collections3.2.1commons-collections-3.2.1.jar;E:
    epository3javaxservletservlet-api2.5servlet-api-2.5.jar;E:
    epository3orgmortbayjettyjetty6.1.26jetty-6.1.26.jar;E:
    epository3orgmortbayjettyjetty-util6.1.26jetty-util-6.1.26.jar;E:
    epository3comsunjerseyjersey-core1.9jersey-core-1.9.jar;E:
    epository3comsunjerseyjersey-json1.9jersey-json-1.9.jar;E:
    epository3orgcodehausjettisonjettison1.1jettison-1.1.jar;E:
    epository3comsunxmlindjaxb-impl2.2.3-1jaxb-impl-2.2.3-1.jar;E:
    epository3javaxxmlindjaxb-api2.2.2jaxb-api-2.2.2.jar;E:
    epository3javaxxmlstreamstax-api1.0-2stax-api-1.0-2.jar;E:
    epository3javaxactivationactivation1.1activation-1.1.jar;E:
    epository3orgcodehausjacksonjackson-jaxrs1.8.3jackson-jaxrs-1.8.3.jar;E:
    epository3orgcodehausjacksonjackson-xc1.8.3jackson-xc-1.8.3.jar;E:
    epository3comsunjerseyjersey-server1.9jersey-server-1.9.jar;E:
    epository3asmasm3.1asm-3.1.jar;E:
    epository3	omcatjasper-compiler5.5.23jasper-compiler-5.5.23.jar;E:
    epository3	omcatjasper-runtime5.5.23jasper-runtime-5.5.23.jar;E:
    epository3javaxservletjspjsp-api2.1jsp-api-2.1.jar;E:
    epository3commons-elcommons-el1.0commons-el-1.0.jar;E:
    epository3commons-loggingcommons-logging1.1.3commons-logging-1.1.3.jar;E:
    epository3log4jlog4j1.2.17log4j-1.2.17.jar;E:
    epository3
    etjavadevjets3tjets3t.9.0jets3t-0.9.0.jar;E:
    epository3orgapachehttpcomponentshttpclient4.1.2httpclient-4.1.2.jar;E:
    epository3orgapachehttpcomponentshttpcore4.1.2httpcore-4.1.2.jar;E:
    epository3comjamesmurtyutilsjava-xmlbuilder.4java-xmlbuilder-0.4.jar;E:
    epository3commons-langcommons-lang2.6commons-lang-2.6.jar;E:
    epository3commons-configurationcommons-configuration1.6commons-configuration-1.6.jar;E:
    epository3commons-digestercommons-digester1.8commons-digester-1.8.jar;E:
    epository3commons-beanutilscommons-beanutils1.7.0commons-beanutils-1.7.0.jar;E:
    epository3commons-beanutilscommons-beanutils-core1.8.0commons-beanutils-core-1.8.0.jar;E:
    epository3orgslf4jslf4j-api1.7.5slf4j-api-1.7.5.jar;E:
    epository3orgslf4jslf4j-log4j121.7.5slf4j-log4j12-1.7.5.jar;E:
    epository3orgcodehausjacksonjackson-core-asl1.9.13jackson-core-asl-1.9.13.jar;E:
    epository3orgcodehausjacksonjackson-mapper-asl1.9.13jackson-mapper-asl-1.9.13.jar;E:
    epository3orgapacheavroavro1.7.4avro-1.7.4.jar;E:
    epository3com	houghtworksparanamerparanamer2.3paranamer-2.3.jar;E:
    epository3orgxerialsnappysnappy-java1.0.4.1snappy-java-1.0.4.1.jar;E:
    epository3comgoogleprotobufprotobuf-java2.5.0protobuf-java-2.5.0.jar;E:
    epository3orgapachehadoophadoop-auth2.5.0hadoop-auth-2.5.0.jar;E:
    epository3orgapachedirectoryserverapacheds-kerberos-codec2.0.0-M15apacheds-kerberos-codec-2.0.0-M15.jar;E:
    epository3orgapachedirectoryserverapacheds-i18n2.0.0-M15apacheds-i18n-2.0.0-M15.jar;E:
    epository3orgapachedirectoryapiapi-asn1-api1.0.0-M20api-asn1-api-1.0.0-M20.jar;E:
    epository3orgapachedirectoryapiapi-util1.0.0-M20api-util-1.0.0-M20.jar;E:
    epository3comjcraftjsch.1.42jsch-0.1.42.jar;E:
    epository3comgooglecodefindbugsjsr3051.3.9jsr305-1.3.9.jar;E:
    epository3orgapachecommonscommons-compress1.4.1commons-compress-1.4.1.jar;E:
    epository3org	ukaanixz1.0xz-1.0.jar;E:
    epository3orgapachehadoophadoop-hdfs2.5.0hadoop-hdfs-2.5.0.jar;E:
    epository3commons-daemoncommons-daemon1.0.13commons-daemon-1.0.13.jar;E:
    epository3io
    etty
    etty3.6.2.Final
    etty-3.6.2.Final.jar;E:
    epository3orgapachehadoophadoop-client2.5.0hadoop-client-2.5.0.jar;E:
    epository3orgapachehadoophadoop-mapreduce-client-app2.5.0hadoop-mapreduce-client-app-2.5.0.jar;E:
    epository3orgapachehadoophadoop-mapreduce-client-common2.5.0hadoop-mapreduce-client-common-2.5.0.jar;E:
    epository3orgapachehadoophadoop-yarn-client2.5.0hadoop-yarn-client-2.5.0.jar;E:
    epository3comsunjerseyjersey-client1.9jersey-client-1.9.jar;E:
    epository3orgapachehadoophadoop-yarn-server-common2.5.0hadoop-yarn-server-common-2.5.0.jar;E:
    epository3orgapachehadoophadoop-mapreduce-client-shuffle2.5.0hadoop-mapreduce-client-shuffle-2.5.0.jar;E:
    epository3orgfusesourceleveldbjnileveldbjni-all1.8leveldbjni-all-1.8.jar;E:
    epository3orgapachehadoophadoop-yarn-api2.5.0hadoop-yarn-api-2.5.0.jar;E:
    epository3orgapachehadoophadoop-mapreduce-client-core2.5.0hadoop-mapreduce-client-core-2.5.0.jar;E:
    epository3orgapachehadoophadoop-yarn-common2.5.0hadoop-yarn-common-2.5.0.jar;E:
    epository3orgapachehadoophadoop-mapreduce-client-jobclient2.5.0hadoop-mapreduce-client-jobclient-2.5.0.jar;E:
    epository3orgapachehadoophadoop-mapreduce-examples2.5.0hadoop-mapreduce-examples-2.5.0.jar;E:
    epository3orgapachezookeeperzookeeper3.4.5zookeeper-3.4.5.jar;E:
    epository3jlinejline.9.94jline-0.9.94.jar;E:
    epository3orgjboss
    etty
    etty3.2.2.Final
    etty-3.2.2.Final.jar" com.wordcount.WordCount
      2 18/11/22 15:37:44 WARN util.NativeCodeLoader: Unable to load native-hadoop library for your platform... using builtin-java classes where applicable
      3 18/11/22 15:37:47 INFO Configuration.deprecation: session.id is deprecated. Instead, use dfs.metrics.session-id
      4 18/11/22 15:37:47 INFO jvm.JvmMetrics: Initializing JVM Metrics with processName=JobTracker, sessionId=
      5 18/11/22 15:37:49 WARN mapreduce.JobSubmitter: No job jar file set.  User classes may not be found. See Job or Job#setJar(String).
      6 18/11/22 15:37:49 INFO input.FileInputFormat: Total input paths to process : 1
      7 18/11/22 15:37:50 INFO mapreduce.JobSubmitter: number of splits:1
      8 18/11/22 15:37:50 INFO mapreduce.JobSubmitter: Submitting tokens for job: job_local857633983_0001
      9 18/11/22 15:37:50 WARN conf.Configuration: file:/opt/module/hadoop-2.5.0/data/tmp/mapred/staging/YinYichang857633983/.staging/job_local857633983_0001/job.xml:an attempt to override final parameter: mapreduce.job.end-notification.max.retry.interval;  Ignoring.
     10 18/11/22 15:37:50 WARN conf.Configuration: file:/opt/module/hadoop-2.5.0/data/tmp/mapred/staging/YinYichang857633983/.staging/job_local857633983_0001/job.xml:an attempt to override final parameter: mapreduce.job.end-notification.max.attempts;  Ignoring.
     11 18/11/22 15:37:51 WARN conf.Configuration: file:/opt/module/hadoop-2.5.0/data/tmp/mapred/local/localRunner/YinYichang/job_local857633983_0001/job_local857633983_0001.xml:an attempt to override final parameter: mapreduce.job.end-notification.max.retry.interval;  Ignoring.
     12 18/11/22 15:37:51 WARN conf.Configuration: file:/opt/module/hadoop-2.5.0/data/tmp/mapred/local/localRunner/YinYichang/job_local857633983_0001/job_local857633983_0001.xml:an attempt to override final parameter: mapreduce.job.end-notification.max.attempts;  Ignoring.
     13 18/11/22 15:37:51 INFO mapreduce.Job: The url to track the job: http://localhost:8080/
     14 18/11/22 15:37:51 INFO mapreduce.Job: Running job: job_local857633983_0001
     15 18/11/22 15:37:51 INFO mapred.LocalJobRunner: OutputCommitter set in config null
     16 18/11/22 15:37:51 INFO mapred.LocalJobRunner: OutputCommitter is org.apache.hadoop.mapreduce.lib.output.FileOutputCommitter
     17 18/11/22 15:37:51 INFO mapred.LocalJobRunner: Waiting for map tasks
     18 18/11/22 15:37:51 INFO mapred.LocalJobRunner: Starting task: attempt_local857633983_0001_m_000000_0
     19 18/11/22 15:37:51 INFO util.ProcfsBasedProcessTree: ProcfsBasedProcessTree currently is supported only on Linux.
     20 18/11/22 15:37:51 INFO mapred.Task:  Using ResourceCalculatorProcessTree : org.apache.hadoop.yarn.util.WindowsBasedProcessTree@3a8763db
     21 18/11/22 15:37:51 INFO mapred.MapTask: Processing split: hdfs://walloce.one:8020/walloce/data/test.txt:0+173
     22 18/11/22 15:37:51 INFO mapred.MapTask: Map output collector class = org.apache.hadoop.mapred.MapTask$MapOutputBuffer
     23 18/11/22 15:37:51 INFO mapred.MapTask: (EQUATOR) 0 kvi 26214396(104857584)
     24 18/11/22 15:37:51 INFO mapred.MapTask: mapreduce.task.io.sort.mb: 100
     25 18/11/22 15:37:51 INFO mapred.MapTask: soft limit at 83886080
     26 18/11/22 15:37:51 INFO mapred.MapTask: bufstart = 0; bufvoid = 104857600
     27 18/11/22 15:37:51 INFO mapred.MapTask: kvstart = 26214396; length = 6553600
     28 map阶段开始执行,,,
     29 map阶段结束。。。
     30 map阶段开始执行,,,
     31 map阶段结束。。。
     32 map阶段开始执行,,,
     33 map阶段结束。。。
     34 map阶段开始执行,,,
     35 map阶段结束。。。
     36 map阶段开始执行,,,
     37 map阶段结束。。。
     38 map阶段开始执行,,,
     39 map阶段结束。。。
     40 18/11/22 15:37:52 INFO mapred.LocalJobRunner: 
     41 18/11/22 15:37:52 INFO mapreduce.Job: Job job_local857633983_0001 running in uber mode : false
     42 18/11/22 15:37:52 INFO mapreduce.Job:  map 0% reduce 0%
     43 18/11/22 15:37:52 INFO mapred.MapTask: Starting flush of map output
     44 18/11/22 15:37:52 INFO mapred.MapTask: Spilling map output
     45 18/11/22 15:37:52 INFO mapred.MapTask: bufstart = 0; bufend = 321; bufvoid = 104857600
     46 18/11/22 15:37:52 INFO mapred.MapTask: kvstart = 26214396(104857584); kvend = 26214252(104857008); length = 145/6553600
     47 Combiner阶段开始...
     48 Combiner阶段结束。。。
     49 Combiner阶段开始...
     50 Combiner阶段结束。。。
     51 Combiner阶段开始...
     52 Combiner阶段结束。。。
     53 Combiner阶段开始...
     54 Combiner阶段结束。。。
     55 Combiner阶段开始...
     56 Combiner阶段结束。。。
     57 Combiner阶段开始...
     58 Combiner阶段结束。。。
     59 Combiner阶段开始...
     60 Combiner阶段结束。。。
     61 Combiner阶段开始...
     62 Combiner阶段结束。。。
     63 Combiner阶段开始...
     64 Combiner阶段结束。。。
     65 18/11/22 15:37:52 INFO mapred.MapTask: Finished spill 0
     66 18/11/22 15:37:52 INFO mapred.Task: Task:attempt_local857633983_0001_m_000000_0 is done. And is in the process of committing
     67 18/11/22 15:37:52 INFO mapred.LocalJobRunner: map
     68 18/11/22 15:37:52 INFO mapred.Task: Task 'attempt_local857633983_0001_m_000000_0' done.
     69 18/11/22 15:37:52 INFO mapred.LocalJobRunner: Finishing task: attempt_local857633983_0001_m_000000_0
     70 18/11/22 15:37:52 INFO mapred.LocalJobRunner: map task executor complete.
     71 18/11/22 15:37:52 INFO mapred.LocalJobRunner: Waiting for reduce tasks
     72 18/11/22 15:37:52 INFO mapred.LocalJobRunner: Starting task: attempt_local857633983_0001_r_000000_0
     73 18/11/22 15:37:52 INFO util.ProcfsBasedProcessTree: ProcfsBasedProcessTree currently is supported only on Linux.
     74 18/11/22 15:37:52 INFO mapred.Task:  Using ResourceCalculatorProcessTree : org.apache.hadoop.yarn.util.WindowsBasedProcessTree@32eb8b1d
     75 18/11/22 15:37:52 INFO mapred.ReduceTask: Using ShuffleConsumerPlugin: org.apache.hadoop.mapreduce.task.reduce.Shuffle@5b831a05
     76 18/11/22 15:37:52 INFO reduce.MergeManagerImpl: MergerManager: memoryLimit=1291845632, maxSingleShuffleLimit=322961408, mergeThreshold=852618176, ioSortFactor=10, memToMemMergeOutputsThreshold=10
     77 18/11/22 15:37:52 INFO reduce.EventFetcher: attempt_local857633983_0001_r_000000_0 Thread started: EventFetcher for fetching Map Completion Events
     78 18/11/22 15:37:52 INFO reduce.LocalFetcher: localfetcher#1 about to shuffle output of map attempt_local857633983_0001_m_000000_0 decomp: 101 len: 105 to MEMORY
     79 18/11/22 15:37:52 INFO reduce.InMemoryMapOutput: Read 101 bytes from map-output for attempt_local857633983_0001_m_000000_0
     80 18/11/22 15:37:52 INFO reduce.MergeManagerImpl: closeInMemoryFile -> map-output of size: 101, inMemoryMapOutputs.size() -> 1, commitMemory -> 0, usedMemory ->101
     81 18/11/22 15:37:52 INFO reduce.EventFetcher: EventFetcher is interrupted.. Returning
     82 18/11/22 15:37:52 INFO mapred.LocalJobRunner: 1 / 1 copied.
     83 18/11/22 15:37:52 INFO reduce.MergeManagerImpl: finalMerge called with 1 in-memory map-outputs and 0 on-disk map-outputs
     84 18/11/22 15:37:52 INFO mapred.Merger: Merging 1 sorted segments
     85 18/11/22 15:37:52 INFO mapred.Merger: Down to the last merge-pass, with 1 segments left of total size: 97 bytes
     86 18/11/22 15:37:52 INFO reduce.MergeManagerImpl: Merged 1 segments, 101 bytes to disk to satisfy reduce memory limit
     87 18/11/22 15:37:52 INFO reduce.MergeManagerImpl: Merging 1 files, 105 bytes from disk
     88 18/11/22 15:37:52 INFO reduce.MergeManagerImpl: Merging 0 segments, 0 bytes from memory into reduce
     89 18/11/22 15:37:52 INFO mapred.Merger: Merging 1 sorted segments
     90 18/11/22 15:37:52 INFO mapred.Merger: Down to the last merge-pass, with 1 segments left of total size: 97 bytes
     91 18/11/22 15:37:52 INFO mapred.LocalJobRunner: 1 / 1 copied.
     92 18/11/22 15:37:52 INFO Configuration.deprecation: mapred.skip.on is deprecated. Instead, use mapreduce.job.skiprecords
     93 Reduce阶段开始执行...
     94 单词I:  5
     95 Reduce阶段结束。。。
     96 Reduce阶段开始执行...
     97 单词am:  5
     98 Reduce阶段结束。。。
     99 Reduce阶段开始执行...
    100 单词an:  5
    101 Reduce阶段结束。。。
    102 Reduce阶段开始执行...
    103 单词for:  5
    104 Reduce阶段结束。。。
    105 Reduce阶段开始执行...
    106 单词hello:  1
    107 Reduce阶段结束。。。
    108 Reduce阶段开始执行...
    109 单词linux:  5
    110 Reduce阶段结束。。。
    111 Reduce阶段开始执行...
    112 单词new:  5
    113 Reduce阶段结束。。。
    114 Reduce阶段开始执行...
    115 单词programer:  5
    116 Reduce阶段结束。。。
    117 Reduce阶段开始执行...
    118 单词world!:  1
    119 Reduce阶段结束。。。
    120 18/11/22 15:37:52 INFO mapred.Task: Task:attempt_local857633983_0001_r_000000_0 is done. And is in the process of committing
    121 18/11/22 15:37:52 INFO mapred.LocalJobRunner: 1 / 1 copied.
    122 18/11/22 15:37:52 INFO mapred.Task: Task attempt_local857633983_0001_r_000000_0 is allowed to commit now
    123 18/11/22 15:37:52 INFO output.FileOutputCommitter: Saved output of task 'attempt_local857633983_0001_r_000000_0' to hdfs://walloce.one:8020/walloce/output/_temporary/0/task_local857633983_0001_r_000000
    124 18/11/22 15:37:52 INFO mapred.LocalJobRunner: reduce > reduce
    125 18/11/22 15:37:52 INFO mapred.Task: Task 'attempt_local857633983_0001_r_000000_0' done.
    126 18/11/22 15:37:52 INFO mapred.LocalJobRunner: Finishing task: attempt_local857633983_0001_r_000000_0
    127 18/11/22 15:37:52 INFO mapred.LocalJobRunner: reduce task executor complete.
    128 18/11/22 15:37:53 INFO mapreduce.Job:  map 100% reduce 100%
    129 18/11/22 15:37:53 INFO mapreduce.Job: Job job_local857633983_0001 completed successfully
    130 18/11/22 15:37:53 INFO mapreduce.Job: Counters: 38
    131     File System Counters
    132         FILE: Number of bytes read=576
    133         FILE: Number of bytes written=490527
    134         FILE: Number of read operations=0
    135         FILE: Number of large read operations=0
    136         FILE: Number of write operations=0
    137         HDFS: Number of bytes read=346
    138         HDFS: Number of bytes written=63
    139         HDFS: Number of read operations=15
    140         HDFS: Number of large read operations=0
    141         HDFS: Number of write operations=6
    142     Map-Reduce Framework
    143         Map input records=6
    144         Map output records=37
    145         Map output bytes=321
    146         Map output materialized bytes=105
    147         Input split bytes=110
    148         Combine input records=37
    149         Combine output records=9
    150         Reduce input groups=9
    151         Reduce shuffle bytes=105
    152         Reduce input records=9
    153         Reduce output records=9
    154         Spilled Records=18
    155         Shuffled Maps =1
    156         Failed Shuffles=0
    157         Merged Map outputs=1
    158         GC time elapsed (ms)=3
    159         CPU time spent (ms)=0
    160         Physical memory (bytes) snapshot=0
    161         Virtual memory (bytes) snapshot=0
    162         Total committed heap usage (bytes)=372244480
    163     Shuffle Errors
    164         BAD_ID=0
    165         CONNECTION=0
    166         IO_ERROR=0
    167         WRONG_LENGTH=0
    168         WRONG_MAP=0
    169         WRONG_REDUCE=0
    170     File Input Format Counters 
    171         Bytes Read=173
    172     File Output Format Counters 
    173         Bytes Written=63
    174 
    175 Process finished with exit code 0

    由wordcount运行结果可以看出,MapReduce的执行顺序:

      1、Map阶段开始

        因为map阶段输入的数据时以行为单位,偏移量即为行序号,有多少行map就执行多少次。

      2、Combiner阶段开始

        经过map阶段的Shuffle后Map阶段结束,开始Combiner阶段,Combiner阶段进行的是每个分区里的数据小聚合,有多少key进行多少次。

      3、Reduce阶段开始

        Combiner阶段结束后,Reduce阶段需要将所有分区的所有数据进行聚合,得出最终的结果。

    记: 心酸的学习历程!

    初心回归,时光已逝!
  • 相关阅读:
    HDU 1068
    hdu6447
    HDU 6438
    网络赛的个人反思总结
    Bellman-ford 模板
    Pairs Forming LCM LightOJ
    POJ
    链式前向星
    POJ 3281 Dining
    游标遍历所有数据库循环执行修改数据库的sql命令
  • 原文地址:https://www.cnblogs.com/yin1361866686/p/10001299.html
Copyright © 2011-2022 走看看