zoukankan      html  css  js  c++  java
  • 大数据wordcount代码。要理解代码就要配合图形理解

    package cn.itcast.hadoop.mr;

    import java.io.IOException;
    import org.apache.hadoop.conf.Configuration;
    import org.apache.hadoop.fs.Path;
    import org.apache.hadoop.io.IntWritable;
    import org.apache.hadoop.io.Text;
    import org.apache.hadoop.mapreduce.Job;
    import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
    import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;

    public class WordCountDriver {
        public WordCountDriver() {
        }

        public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException {
            Configuration conf = new Configuration();
            //conf.set("mapreduce.app-submission.cross-platform", "true");  // 跨平台,保证在 Windows 下可以提交 mr job
            Job job = Job.getInstance(conf, "word count");
            job.setJarByClass(WordCountDriver.class);
            job.setMapperClass(WordCountMapper.class);
            job.setReducerClass(WordCountReducer.class);
            job.setMapOutputKeyClass(Text.class);
            job.setMapOutputValueClass(IntWritable.class);
            job.setOutputKeyClass(Text.class);
            job.setOutputValueClass(IntWritable.class);
            FileInputFormat.addInputPath(job, new Path("/home/node-1/zhouriyue/input/"));
            FileOutputFormat.setOutputPath(job, new Path("/home/node-1/zhouriyue/output/"));
            /*FileInputFormat.setInputPaths(job, "/wordcount/input");
            FileOutputFormat.setOutputPath(job, new Path("/wordcount/output"));*/
            boolean b = job.waitForCompletion(true);
            System.exit(b ? 0 : 1);
        }
    }

    package cn.itcast.hadoop.mr;

    import java.io.IOException;
    import java.util.Arrays;

    import org.apache.hadoop.io.IntWritable;
    import org.apache.hadoop.io.LongWritable;
    import org.apache.hadoop.io.Text;
    import org.apache.hadoop.mapreduce.Mapper;

    public class WordCountMapper extends Mapper<LongWritable, Text, Text, IntWritable> {
        public WordCountMapper() {
        }

        protected void map(LongWritable key, Text value, Mapper<LongWritable, Text, Text, IntWritable>.Context context) throws IOException, InterruptedException {
            String line = value.toString();
            String[] words = line.split(" ");
            String[] var9 = words;
            int var8 = words.length;
            for(int var7 = 0; var7 < var8; ++var7) {
                String word = var9[var7];
                System.out.println(word+","+1);
                context.write(new Text(word), new IntWritable(1));
            }

        }
    }

    package cn.itcast.hadoop.mr;

    import java.io.IOException;
    import java.util.Arrays;
    import java.util.Iterator;
    import org.apache.hadoop.io.IntWritable;
    import org.apache.hadoop.io.Text;
    import org.apache.hadoop.mapreduce.Reducer;

    public class WordCountReducer extends Reducer<Text, IntWritable, Text, IntWritable> {
        public WordCountReducer() {
        }

        protected void reduce(Text key, Iterable<IntWritable> values, Reducer<Text, IntWritable, Text, IntWritable>.Context context) throws IOException, InterruptedException {
            int count = 0;
            IntWritable value;
            System.out.println(key+","+count);
            for(Iterator var6 = values.iterator(); var6.hasNext();) {
                System.out.println("count:"+count+"var6:"+var6);
                value = (IntWritable)var6.next();
                count += value.get();
            }
            context.write(key, new IntWritable(count));
        }
    }

    举一反三,借鉴别人的写出自己的才是真的好。

    问题:求4.txt,5.txt,6.txt文件里所有数字的最大值。代码如下

    package com.gxuwz.MaxValue;

    import org.apache.hadoop.conf.Configuration;
    import org.apache.hadoop.io.IntWritable;
    import org.apache.hadoop.fs.Path;
    import org.apache.hadoop.io.Text;
    import org.apache.hadoop.mapreduce.Job;
    import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
    import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;

    import java.io.IOException;

    public class MaxValueDriver {
        public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException {
            Configuration conf = new Configuration();
            Job job = Job.getInstance(conf);
            job.setJarByClass(MaxValueDriver.class);
            job.setMapperClass(MaxValueMapper.class);
            job.setReducerClass(MaxValueReducer.class);
            job.setMapOutputKeyClass(Text.class);
            job.setMapOutputValueClass(Text.class);
            job.setOutputKeyClass(Text.class);
            job.setOutputValueClass(IntWritable.class);
            FileInputFormat.setInputPaths(job,"/home/node-1/zhouriyue/data/");
            FileOutputFormat.setOutputPath(job,new Path("/home/node-1/zhouriyue/maxvalue/"));
            Boolean b = job.waitForCompletion(true);
            System.exit(b?0:1);
        }
    }

    package com.gxuwz.MaxValue;

    import org.apache.hadoop.io.IntWritable;
    import org.apache.hadoop.io.LongWritable;
    import org.apache.hadoop.io.Text;
    import org.apache.hadoop.mapreduce.Mapper;

    import java.io.IOException;

    public class MaxValueMapper extends Mapper<LongWritable, Text,Text, Text> {
        @Override
        protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
            String data = value.toString();
            String[] values = data.split(" ");
            for(int i = 0;i < values.length;i++) {
                context.write(new Text("maxValue"),new Text(values[i]));
            }
        }
    }

    package com.gxuwz.MaxValue;

    import org.apache.hadoop.io.IntWritable;
    import org.apache.hadoop.io.Text;
    import org.apache.hadoop.mapreduce.Reducer;

    import java.io.IOException;
    import java.util.Iterator;

    public class MaxValueReducer extends Reducer<Text, Text,Text,IntWritable> {
        @Override
        protected void reduce(Text key, Iterable<Text> values, Context context) throws IOException, InterruptedException {
            int maxValue = 0;
            Iterator datas = values.iterator();
            Text v = null;
            while (datas.hasNext()) {
                v = (Text)datas.next();
                System.out.println("v:"+v.toString());
                int s = Integer.parseInt(""+v.toString());
                if(s > maxValue) {
                    maxValue = s;
                }
            }
            context.write(new Text("maxValue"),new IntWritable(maxValue));
        }
    }

  • 相关阅读:
    nas存储服务器硬盘出现故障离线导致磁盘阵列失效、服务器无法访问的数据恢复案例
    【北亚vSAN数据恢复案例】异常断电导致vSAN底层数据损坏的数据恢复
    【Vsan数据恢复】供电不稳服务器非正常关机导致vsan架构中虚拟机磁盘文件丢失的数据恢复
    随机数
    字符串和数组截取.....某人可以看看这个,希望能帮到你,
    利用angular与后台的交互
    AngularJS 深入理解 $scope
    angular 后台交换实例
    alert()、confirm()和prompt()的区别与用法
    ReactJs入门教程
  • 原文地址:https://www.cnblogs.com/riyueqian/p/12254124.html
Copyright © 2011-2022 走看看