三个文件
2017-03-10 a
2017-03-11 b
2017-03-12 d
2017-03-13 d
2017-03-14
2017-03-15 a
2017-03-10 e
2017-03-11 b
2017-03-12 c
2017-03-13
2017-03-14 h
2017-03-15 a
2017-03-17 p
2017-03-10
2017-03-11 b
2017-03-12
2017-03-13 d
2017-03-14
2017-03-15 f
2017-03-16 o
import java.io.IOException; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.Path; import org.apache.hadoop.io.Text; import org.apache.hadoop.mapreduce.Job; import org.apache.hadoop.mapreduce.Mapper; import org.apache.hadoop.mapreduce.Reducer; import org.apache.hadoop.mapreduce.lib.input.FileInputFormat; import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat; public class Deup{ public static class Map extends Mapper<Object, Text, Text, Text>{ @Override protected void map(Object key, Text value, Mapper<Object, Text, Text, Text>.Context context) throws IOException, InterruptedException { context.write(value, new Text("")); } } public static class Reduce extends Reducer<Text, Text, Text, Text>{ @Override protected void reduce(Text key, Iterable<Text> values, Reducer<Text, Text, Text, Text>.Context context) throws IOException, InterruptedException { context.write(key, new Text("")); } } public static void main(String[] args) throws Exception { Configuration conf=new Configuration(); @SuppressWarnings("deprecation") Job job=new Job(conf); job.setJarByClass(Deup.class); job.setMapperClass(Map.class); job.setCombinerClass(Reduce.class); job.setReducerClass(Reduce.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(Text.class); FileInputFormat.addInputPath(job, new Path(args[0])); FileOutputFormat.setOutputPath(job, new Path(args[1])); System.exit(job.waitForCompletion(true)?0:1); } }
结果
2017-03-10
2017-03-10 a
2017-03-10 e
2017-03-11 b
2017-03-12
2017-03-12 c
2017-03-12 d
2017-03-13
2017-03-13 d
2017-03-14
2017-03-14 h
2017-03-15 a
2017-03-15 f
2017-03-16 o
2017-03-17 p