zoukankan      html  css  js  c++  java
  • 大数据wordcount代码。要理解代码就要配合图形理解

    package cn.itcast.hadoop.mr;

    import java.io.IOException;
    import org.apache.hadoop.conf.Configuration;
    import org.apache.hadoop.fs.Path;
    import org.apache.hadoop.io.IntWritable;
    import org.apache.hadoop.io.Text;
    import org.apache.hadoop.mapreduce.Job;
    import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
    import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;

    public class WordCountDriver {
        public WordCountDriver() {
        }

        public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException {
            Configuration conf = new Configuration();
            //conf.set("mapreduce.app-submission.cross-platform", "true");  // 跨平台,保证在 Windows 下可以提交 mr job
            Job job = Job.getInstance(conf, "word count");
            job.setJarByClass(WordCountDriver.class);
            job.setMapperClass(WordCountMapper.class);
            job.setReducerClass(WordCountReducer.class);
            job.setMapOutputKeyClass(Text.class);
            job.setMapOutputValueClass(IntWritable.class);
            job.setOutputKeyClass(Text.class);
            job.setOutputValueClass(IntWritable.class);
            FileInputFormat.addInputPath(job, new Path("/home/node-1/zhouriyue/input/"));
            FileOutputFormat.setOutputPath(job, new Path("/home/node-1/zhouriyue/output/"));
            /*FileInputFormat.setInputPaths(job, "/wordcount/input");
            FileOutputFormat.setOutputPath(job, new Path("/wordcount/output"));*/
            boolean b = job.waitForCompletion(true);
            System.exit(b ? 0 : 1);
        }
    }

    package cn.itcast.hadoop.mr;

    import java.io.IOException;
    import java.util.Arrays;

    import org.apache.hadoop.io.IntWritable;
    import org.apache.hadoop.io.LongWritable;
    import org.apache.hadoop.io.Text;
    import org.apache.hadoop.mapreduce.Mapper;

    public class WordCountMapper extends Mapper<LongWritable, Text, Text, IntWritable> {
        public WordCountMapper() {
        }

        protected void map(LongWritable key, Text value, Mapper<LongWritable, Text, Text, IntWritable>.Context context) throws IOException, InterruptedException {
            String line = value.toString();
            String[] words = line.split(" ");
            String[] var9 = words;
            int var8 = words.length;
            for(int var7 = 0; var7 < var8; ++var7) {
                String word = var9[var7];
                System.out.println(word+","+1);
                context.write(new Text(word), new IntWritable(1));
            }

        }
    }

    package cn.itcast.hadoop.mr;

    import java.io.IOException;
    import java.util.Arrays;
    import java.util.Iterator;
    import org.apache.hadoop.io.IntWritable;
    import org.apache.hadoop.io.Text;
    import org.apache.hadoop.mapreduce.Reducer;

    public class WordCountReducer extends Reducer<Text, IntWritable, Text, IntWritable> {
        public WordCountReducer() {
        }

        protected void reduce(Text key, Iterable<IntWritable> values, Reducer<Text, IntWritable, Text, IntWritable>.Context context) throws IOException, InterruptedException {
            int count = 0;
            IntWritable value;
            System.out.println(key+","+count);
            for(Iterator var6 = values.iterator(); var6.hasNext();) {
                System.out.println("count:"+count+"var6:"+var6);
                value = (IntWritable)var6.next();
                count += value.get();
            }
            context.write(key, new IntWritable(count));
        }
    }

    举一反三,借鉴别人的写出自己的才是真的好。

    问题:求4.txt,5.txt,6.txt文件里所有数字的最大值。代码如下

    package com.gxuwz.MaxValue;

    import org.apache.hadoop.conf.Configuration;
    import org.apache.hadoop.io.IntWritable;
    import org.apache.hadoop.fs.Path;
    import org.apache.hadoop.io.Text;
    import org.apache.hadoop.mapreduce.Job;
    import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
    import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;

    import java.io.IOException;

    public class MaxValueDriver {
        public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException {
            Configuration conf = new Configuration();
            Job job = Job.getInstance(conf);
            job.setJarByClass(MaxValueDriver.class);
            job.setMapperClass(MaxValueMapper.class);
            job.setReducerClass(MaxValueReducer.class);
            job.setMapOutputKeyClass(Text.class);
            job.setMapOutputValueClass(Text.class);
            job.setOutputKeyClass(Text.class);
            job.setOutputValueClass(IntWritable.class);
            FileInputFormat.setInputPaths(job,"/home/node-1/zhouriyue/data/");
            FileOutputFormat.setOutputPath(job,new Path("/home/node-1/zhouriyue/maxvalue/"));
            Boolean b = job.waitForCompletion(true);
            System.exit(b?0:1);
        }
    }

    package com.gxuwz.MaxValue;

    import org.apache.hadoop.io.IntWritable;
    import org.apache.hadoop.io.LongWritable;
    import org.apache.hadoop.io.Text;
    import org.apache.hadoop.mapreduce.Mapper;

    import java.io.IOException;

    public class MaxValueMapper extends Mapper<LongWritable, Text,Text, Text> {
        @Override
        protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
            String data = value.toString();
            String[] values = data.split(" ");
            for(int i = 0;i < values.length;i++) {
                context.write(new Text("maxValue"),new Text(values[i]));
            }
        }
    }

    package com.gxuwz.MaxValue;

    import org.apache.hadoop.io.IntWritable;
    import org.apache.hadoop.io.Text;
    import org.apache.hadoop.mapreduce.Reducer;

    import java.io.IOException;
    import java.util.Iterator;

    public class MaxValueReducer extends Reducer<Text, Text,Text,IntWritable> {
        @Override
        protected void reduce(Text key, Iterable<Text> values, Context context) throws IOException, InterruptedException {
            int maxValue = 0;
            Iterator datas = values.iterator();
            Text v = null;
            while (datas.hasNext()) {
                v = (Text)datas.next();
                System.out.println("v:"+v.toString());
                int s = Integer.parseInt(""+v.toString());
                if(s > maxValue) {
                    maxValue = s;
                }
            }
            context.write(new Text("maxValue"),new IntWritable(maxValue));
        }
    }

  • 相关阅读:
    [RxJS] throwIfEmpty
    [Kotlin] I/O readline
    [Kotlin] Generic Functions
    [Kotlin] Generics basic
    [CSS 3] Use Multiple Background Images to Create Single Element CSS Art
    [Kotlin] Visibilities
    [Kotlin] Getter and Setter
    [Kotlin] Enum class
    [Kotlin] Singleton Object
    面试问Redis集群,被虐的不行了......
  • 原文地址:https://www.cnblogs.com/riyueqian/p/12254124.html
Copyright © 2011-2022 走看看