zoukankan      html  css  js  c++  java
  • MapReduce实例

    数据去重:

     1 import java.io.IOException;
     2 
     3 import org.apache.hadoop.fs.Path;
     4 import org.apache.hadoop.io.Text;
     5 import org.apache.hadoop.mapreduce.Job;
     6 import org.apache.hadoop.mapreduce.Mapper;
     7 import org.apache.hadoop.mapreduce.Reducer;
     8 import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
     9 import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
    10 
    11 public class Dedup {
    12     public static class Map extends Mapper<Object, Text, Text, Text>{
    13         private static Text line=new Text();
    14         
    15         public void  map(Object key,Text value,Context context) throws IOException, InterruptedException {
    16             line=value;
    17             context.write(line, new Text(""));
    18         }
    19     }
    20     
    21     public static class Reduce extends Reducer<Text, Text, Text, Text>{
    22         public void  ruduce(Text key,Iterable<Text> values,Context context) throws IOException, InterruptedException {
    23             context.write(key, new Text(""));
    24         }
    25     }
    26     
    27     public static void main(String[] args) throws Exception {
    28         if (args.length != 2) {
    29             System.err.println("Usage: DataAverage <input path> <output path>");
    30             System.exit(-1);
    31         }
    32         Job job = new Job();
    33         job.setJobName("Dedup");
    34         job.setJarByClass(Dedup.class);
    35         job.setMapperClass(Map.class);
    36         job.setCombinerClass(Reducer.class);
    37         job.setReducerClass(Reducer.class);
    38         job.setOutputKeyClass(Text.class);
    39         job.setOutputValueClass(Text.class);
    40         FileInputFormat.addInputPath(job, new Path(args[0]));
    41         FileOutputFormat.setOutputPath(job, new Path(args[1]));
    42         System.exit(job.waitForCompletion(true) ? 0 : 1);
    43     }
    44 }
    View Code

    排序:

     1 import java.io.IOException;
     2 
     3 import org.apache.hadoop.fs.Path;
     4 import org.apache.hadoop.io.IntWritable;
     5 import org.apache.hadoop.io.Text;
     6 import org.apache.hadoop.mapreduce.Job;
     7 import org.apache.hadoop.mapreduce.Mapper;
     8 import org.apache.hadoop.mapreduce.Reducer;
     9 import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
    10 import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
    11 
    12 
    13 public class Sort {
    14     public static class Map extends Mapper<Object, Text, IntWritable, IntWritable>{
    15         private static IntWritable data= new IntWritable();
    16         public void map(Object key,Text value,Context context) throws IOException, InterruptedException {
    17             String lineString=value.toString();
    18             data.set(Integer.parseInt(lineString));
    19             context.write(data, new IntWritable(1));
    20         }
    21     }
    22     
    23     public static class Reduce extends Reducer<IntWritable ,IntWritable,IntWritable,IntWritable>{
    24         private static IntWritable linenum=new IntWritable(1);
    25         public void reduce(IntWritable key,Iterable< IntWritable> values,Context context) throws IOException, InterruptedException {
    26             for(IntWritable value:values){
    27                 context.write(linenum, key);
    28                 linenum=new IntWritable(linenum.get()+1);
    29             }
    30         }
    31     }
    32     
    33     public static void main(String[] args) throws Exception {
    34         if (args.length != 2) {
    35             System.err.println("Usage: DataAverage <input path> <output path>");
    36             System.exit(-1);
    37         }
    38         Job job = new Job();
    39         job.setJobName("Sort");
    40         job.setJarByClass(Sort.class);
    41         
    42         job.setMapperClass(Map.class);
    43         job.setReducerClass(Reducer.class);
    44         
    45         job.setOutputKeyClass(IntWritable.class);
    46         job.setOutputValueClass(IntWritable.class);
    47         
    48         FileInputFormat.addInputPath(job, new Path(args[0]));
    49         FileOutputFormat.setOutputPath(job, new Path(args[1]));
    50         System.exit(job.waitForCompletion(true) ? 0 : 1);
    51     }
    52 }
    View Code

    求平均值:

     1 import java.io.IOException;
     2 import java.util.StringTokenizer;
     3 
     4 import org.apache.hadoop.fs.Path;
     5 import org.apache.hadoop.io.IntWritable;
     6 import org.apache.hadoop.io.Text;
     7 import org.apache.hadoop.mapreduce.Job;
     8 import org.apache.hadoop.mapreduce.Mapper;
     9 import org.apache.hadoop.mapreduce.Reducer;
    10 import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
    11 import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
    12 
    13 public class Score {
    14     public static class Map extends Mapper<Object, Text, Text, IntWritable>{
    15         public void map(Object key,Text value,Context context) throws IOException, InterruptedException{
    16             String lineString =value.toString();
    17             StringTokenizer stringTokenizer=new StringTokenizer(lineString,"
    ");
    18             while (stringTokenizer.hasMoreElements()) {
    19                 StringTokenizer tokenizerLine = new StringTokenizer(stringTokenizer.nextToken());
    20                 String strName=tokenizerLine.nextToken();
    21                 String strScore=tokenizerLine.nextToken();
    22                 Text name=new Text(strName);
    23                 int score=Integer.parseInt(strScore);
    24                 context.write(name, new IntWritable(score));
    25             }
    26         }
    27     }
    28     
    29     public static class Reduce extends Reducer<Text, IntWritable, Text, IntWritable>{
    30         public void reduce(Text key,Iterable<IntWritable> values,Context context) throws IOException, InterruptedException{
    31             int sum=0;
    32             int count=0;
    33             while (values.iterator().hasNext()) {
    34                 sum+=values.iterator().next().get();
    35                 count++;
    36             }
    37             int averate=(int)sum/count;
    38             context.write(key, new IntWritable(averate));
    39         }
    40     }
    41     
    42     public static void main(String[] args) throws Exception {
    43         if (args.length != 2) {
    44             System.err.println("Usage: DataAverage <input path> <output path>");
    45             System.exit(-1);
    46         }
    47         Job job = new Job();
    48         job.setJobName("Score");
    49         job.setJarByClass(Score.class);
    50         
    51         job.setMapperClass(Map.class);
    52         job.setReducerClass(Reducer.class);
    53         
    54         job.setOutputKeyClass(Text.class);
    55         job.setOutputValueClass(IntWritable.class);
    56         
    57         FileInputFormat.addInputPath(job, new Path(args[0]));
    58         FileOutputFormat.setOutputPath(job, new Path(args[1]));
    59         System.exit(job.waitForCompletion(true) ? 0 : 1);
    60     }
    61 }
    View Code
  • 相关阅读:
    bash中一次性给多个变量赋值命名管道的使用
    Mysql复制还原后root帐号权限丢失问题
    TC中HTB的使用备注
    Python 调用JS文件中的函数
    PIL图像处理模块,功能强大、简单易用(转)
    openfeign 实现动态Url
    Extjs 进度条的应用【转】
    Javascript 对象与数组中的函数下【转】
    Linq学习笔记之一:Linq To XML
    Sql Server查询语句的一些小技巧
  • 原文地址:https://www.cnblogs.com/liutoutou/p/3361362.html
Copyright © 2011-2022 走看看