zoukankan      html  css  js  c++  java
  • Hadoop on Mac with IntelliJ IDEA

    简化陆喜恒. Hadoop实战(第2版)5.4单表关联的代码时遇到空指向异常,经分析是逻辑问题,在此做个记录。

    环境:Mac OS X 10.9.5, IntelliJ IDEA 13.1.5, Hadoop 1.2.1

    改好的代码如下,在reduce阶段遇到了NullPointerException。

     1 public class STjoinEx {
     2     private static final String TIMES = "TIMES";
     3 
     4     public static void main(String[] args) throws Exception {
     5         Configuration configuration = new Configuration();
     6         configuration.setInt(TIMES, 1);
     7         String[] remainingArgs = new GenericOptionsParser(configuration, args).getRemainingArgs();
     8         if (remainingArgs.length != 2) {
     9             System.err.println("STjoinEx <input> <output>");
    10             System.exit(2);
    11         }
    12 
    13         Job job = new Job(configuration, STjoinEx.class.getSimpleName());
    14         job.setJarByClass(STjoinEx.class);
    15         job.setMapperClass(Map.class);
    16         job.setReducerClass(Reduce.class);
    17         job.setInputFormatClass(KeyValueTextInputFormat.class);
    18         job.setOutputFormatClass(TextOutputFormat.class);
    19         job.setOutputKeyClass(Text.class);
    20         job.setOutputValueClass(Text.class);
    21 
    22         FileInputFormat.setInputPaths(job, new Path(remainingArgs[0]));
    23         FileOutputFormat.setOutputPath(job, new Path(remainingArgs[1]));
    24 
    25         System.exit(job.waitForCompletion(true) ? 0 : 1);
    26 
    27     }
    28 
    29     public static class Map extends Mapper<Text, Text, Text, Text> {
    30         final static Text LEFT_TABLE = new Text();
    31         final static Text RIGHT_TABLE = new Text();
    32 
    33         @Override
    34         protected void map(Text key, Text value, Context context) throws IOException, InterruptedException {
    35             // left table
    36             LEFT_TABLE.set("1 " + value);
    37             context.write(key, LEFT_TABLE);
    38             // right table
    39             RIGHT_TABLE.set("2 " + key);
    40             context.write(value, RIGHT_TABLE);
    41         }
    42     }
    43 
    44     public static class Reduce extends Reducer<Text, Text, Text, Text> {
    45         private static final int INDENT = 2;
    46         private static final Text GRAND_PARENT = new Text();
    47         private static final Text GRAND_CHILD = new Text();
    48 
    49         @Override
    50         protected void reduce(Text key, Iterable<Text> values, Context context) throws IOException, InterruptedException {
    51             // output header
    52             int times = context.getConfiguration().getInt(TIMES, 1);
    53             if (times == 1) {
    54                 context.write(new Text("grandChild"), new Text("grandParent"));
    55                 context.getConfiguration().setInt(TIMES, ++times);
    56             }
    57 
    58             // prepare matrix
    59             int headChar = 0;
    60             String[] grandChild = new String[10];
    61             String[] grandParent = new String[10];
    62             int grandChildNum = 0;
    63             int grandParentNum = 0;
    64 
    65             for (Text value : values) {
    66                 headChar = value.charAt(0);
    67                 if (headChar == '1') {
    68                     grandParent[grandParentNum] = value.toString().substring(2);
    69                     grandParentNum++;
    70                 } else {
    71                     grandChild[grandChildNum] = value.toString().substring(2);
    72                     grandChildNum++;
    73                 }
    74             }
    75 
    76             // multiply
    77             if (grandChildNum != 0 && grandChildNum != 0) {
    78                 for (int i = 0; i < grandChildNum; i++) {
    79                     GRAND_CHILD.set(grandChild[i]);
    80                     for (int j = 0; j < grandParentNum; j++) {
    81                         GRAND_PARENT.set(grandParent[j]);
    82                         context.write(GRAND_CHILD, GRAND_PARENT);
    83                     }
    84                 }
    85             }
    86         }
    87     }
    88 }

    执行输出为

     1 14/10/07 11:12:51 INFO mapred.JobClient:  map 0% reduce 0%
     2 14/10/07 11:12:54 INFO mapred.JobClient:  map 100% reduce 0%
     3 14/10/07 11:13:01 INFO mapred.JobClient:  map 100% reduce 33%
     4 14/10/07 11:13:04 INFO mapred.JobClient: Task Id : attempt_201410021756_0048_r_000000_0, Status : FAILED
     5 java.lang.NullPointerException
     6     at org.apache.hadoop.io.Text.encode(Text.java:388)
     7     at org.apache.hadoop.io.Text.set(Text.java:178)
     8     at main.ch5.STjoinEx$Reduce.reduce(STjoinEx.java:96)
     9     at main.ch5.STjoinEx$Reduce.reduce(STjoinEx.java:61)
    10     at org.apache.hadoop.mapreduce.Reducer.run(Reducer.java:177)
    11     at org.apache.hadoop.mapred.ReduceTask.runNewReducer(ReduceTask.java:649)
    12     at org.apache.hadoop.mapred.ReduceTask.run(ReduceTask.java:418)
    13     at org.apache.hadoop.mapred.Child$4.run(Child.java:255)
    14     at java.security.AccessController.doPrivileged(Native Method)
    15     at javax.security.auth.Subject.doAs(Subject.java:396)
    16     at org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1190)
    17     at org.apache.hadoop.mapred.Child.main(Child.java:249)

    从输出信息可发现,源码96行if (grandChildNum != 0 && grandChildNum != 0)为出错行。两个判断条件重复了,将其中一个改成grandParentNum即可。

    执行结果

     1 grandChild    grandParent
     2 Jone    Alice
     3 Jone    Jesse
     4 Tom    Alice
     5 Tom    Jesse
     6 Tom    Mary
     7 Tom    Ben
     8 Jone    Mary
     9 Jone    Ben
    10 Philip    Alice
    11 Philip    Jesse
    12 Mark    Alice
    13 Mark    Jesse
  • 相关阅读:
    大数据学习笔记之一:大数据初识
    从漏洞中总结编程规范(转发)+自我补充
    软件性能测试的基本概念和计算公式(转发)
    系统吞吐量、TPS(QPS)、用户并发量、性能测试概念和公式(转发)
    Linux学习记录(三):time 相关
    Linux报错第一弹: /bin/sh^M: bad interpreter: No such file or directory
    Linux学习记录(二)----if
    SVN 提交出错1
    java.lang.NoClassDefFoundError
    git 将文件取消版本控制
  • 原文地址:https://www.cnblogs.com/michaellfx/p/4009214.html
Copyright © 2011-2022 走看看