zoukankan      html  css  js  c++  java
  • 炼数成金hadoop视频干货05


     视频地址:http://pan.baidu.com/s/1dDEgKwD


    这一节是讲师助教带着动手操作写简单的开发环境的部署和两个实例


    开发环境的部署:http://www.cnblogs.com/admln/p/test-deployDevelopment.html


    第一个实例就是wordcount


    第二个实例

     1 package testHadoop;
     2 
     3 import java.io.IOException;
     4 
     5 import org.apache.hadoop.conf.Configuration;
     6 import org.apache.hadoop.conf.Configured;
     7 import org.apache.hadoop.fs.Path;
     8 import org.apache.hadoop.io.LongWritable;
     9 import org.apache.hadoop.io.Text;
    10 import org.apache.hadoop.mapred.TextOutputFormat;
    11 import org.apache.hadoop.mapreduce.Job;
    12 import org.apache.hadoop.mapreduce.Mapper;
    13 import org.apache.hadoop.mapreduce.Reducer;
    14 import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
    15 import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
    16 import org.apache.hadoop.util.Tool;
    17 import org.apache.hadoop.util.ToolRunner;
    18 
    19 @SuppressWarnings("deprecation")
    20 public class ReverseIndex extends Configured implements Tool{
    21     enum Counter{
    22         LINESKIP;
    23     }
    24     
    25     public static class Map extends Mapper<LongWritable,Text,Text,Text> {
    26         public void map(LongWritable key,Text value,Context context) throws IOException, InterruptedException {
    27             String line = value.toString();
    28             try {
    29                 String[] lineSplit = line.split(" ");
    30                 String anum = lineSplit[0];
    31                 String bnum = lineSplit[1];
    32                 
    33                 context.write(new Text(bnum), new Text(anum));
    34             }catch(java.lang.ArrayIndexOutOfBoundsException e) {
    35                 context.getCounter(Counter.LINESKIP).increment(1);
    36                 return;
    37             }
    38             
    39         }
    40     }
    41     public static class Reduce extends Reducer<Text,Text,Text,Text> {
    42         public void reduce(Text key,Iterable<Text> values,Context context) throws IOException, InterruptedException {
    43             String valueString;
    44             String out = "";
    45             
    46             for(Text value:values) {
    47                 valueString = value.toString();
    48                 out += valueString+"|";
    49             }
    50             context.write(key, new Text(out));
    51         }
    52     }
    53     public int run(String[] args) throws Exception {
    54         Configuration conf = getConf();
    55         
    56         Job job = new Job(conf,"ReverseIndex");
    57         job.setJarByClass(ReverseIndex.class);
    58         
    59         FileInputFormat.addInputPath(job, new Path(args[0]));
    60         FileOutputFormat.setOutputPath(job, new Path(args[1]));
    61         
    62         job.setMapperClass(Map.class);
    63         job.setReducerClass(Reduce.class);
    64         //job.setOutputFormatClass(TextOutputFormat.class);
    65         job.setOutputKeyClass(Text.class);
    66         job.setOutputValueClass(Text.class);
    67         
    68         job.waitForCompletion(true);
    69         
    70         return job.isSuccessful()?0:1;
    71         
    72     }
    73     public static void main(String[] args) throws Exception {
    74         int res = ToolRunner.run(new Configuration(), new ReverseIndex(),args);
    75         System.exit(res);
    76     }
    77 }

    eclipse中运行没问题后打包用集群运行的时候遇到一个小问题

    版本不合。原来我编译的时候在windows下面用的JDK7,Linux中hadoop JDK是1.6。

    把源码在Linux上1.6编译一下就可以了。


    实践过程中还学到一个小知识,在运行程序命令中输入输出路径如果用例如 input output这样的,就是默认在HDFS上/user/用户名/下面的input和output

    如果用例如/input /output这样的路径就是在HDFS根目录下的input 和output


    欲为大树,何与草争;心若不动,风又奈何。
  • 相关阅读:
    [20170612]FOR ALL COLUMNS SIZE repeat(11g).txt
    [20170612]FOR ALL COLUMNS SIZE repeat(12c).txt
    [20170611]关于数据块地址的计算.txt
    [20170607]再论Private Strand Flush Not Complete.txt
    [20170606]11G _optimizer_null_aware_antijoin.txt
    42_自定义泛型类的应用
    43_通过反射获得泛型的实际类型参数
    为什么好男孩找不到女朋友
    38_泛型的通配符扩展应用
    36_入门泛型的基本应用
  • 原文地址:https://www.cnblogs.com/admln/p/dataguru05.html
Copyright © 2011-2022 走看看