zoukankan      html  css  js  c++  java
  • 跟踪wordcount计数器的运行信息

    1、mapper类

    package com.cr.wordcount;
    
    import org.apache.hadoop.io.IntWritable;
    import org.apache.hadoop.io.LongWritable;
    import org.apache.hadoop.io.Text;
    import org.apache.hadoop.mapreduce.Mapper;
    
    import java.io.IOException;
    
    public class WorcountMapper extends Mapper<LongWritable,Text,Text,IntWritable>{
        //无参构造
        public WorcountMapper(){
            System.out.println("-->new WorcountMapper");
        }
        @Override
        protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
    
            Text keyOut = new Text();
            IntWritable valueout = new IntWritable();
            String[] arr = value.toString().split(" ");
            for(String s : arr){
                keyOut.set(s);
                valueout.set(1);
                context.write(keyOut,valueout);
                //每调用一次该mapper文件,就记录一次
                context.getCounter("m组",Utils.getInfo(this,"map类")).increment(1);
            }
        }
        }
    
    

    2、reducer类

    package com.cr.wordcount;
    
    import org.apache.hadoop.io.IntWritable;
    import org.apache.hadoop.io.Text;
    import org.apache.hadoop.mapreduce.Reducer;
    
    import java.io.IOException;
    
    public class WordcountReducer extends Reducer<Text,IntWritable,Text,IntWritable> {
        //无参构造
        public WordcountReducer(){
            System.out.println("-->new WordcountReducer");
        }
        @Override
        protected void reduce(Text key, Iterable<IntWritable> values, Context context) throws IOException, InterruptedException {
            int count = 0;
            for(IntWritable iw : values){
                count += iw.get();
            }
            //获取当前线程
            String tno = Thread.currentThread().getName();
            System.out.println("线程==>"+ tno + "===>  reducer ===>  " + key.toString() + "===>" + count);
            //进入一次reducer就加1次
    //        context.getCounter("==reduce===","===WordcountReducer.reduce===").increment(1);
            context.getCounter("r组",Utils.getInfo(this,"reducer类")).increment(1);
            context.write(key,new IntWritable(count));
    
        }
    }
    

    3、计数器主函数

    package com.cr.wordcount;
    
    import org.apache.hadoop.conf.Configuration;
    import org.apache.hadoop.examples.WordMean;
    import org.apache.hadoop.fs.Path;
    import org.apache.hadoop.io.IntWritable;
    import org.apache.hadoop.io.Text;
    import org.apache.hadoop.mapreduce.Job;
    import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
    import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
    import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
    
    import java.io.IOException;
    
    public class WordcountApp {
        public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException {
            //单例作业
            Configuration conf = new Configuration();
    //        conf.set("fs.defaultFS","file:///");
            Job job = Job.getInstance(conf);
    
            //设置job的各种属性
            job.setJobName("wordcountAPP");                 //设置job名称
            job.setJarByClass(WordcountApp.class);              //设置搜索类
            job.setInputFormatClass(TextInputFormat.class);
    
            //设置输入路径
            FileInputFormat.addInputPath(job,new Path((args[0])));
            //设置输出路径
            FileOutputFormat.setOutputPath(job,new Path(args[1]));
    
    //        //设置分区
    //        job.setPartitionerClass(MyPartitioner.class);
    //        //设置合成类s
            job.setCombinerClass(WordcountReducer.class);
    
    
            job.setMapperClass(WorcountMapper.class);               //设置mapper类
            job.setReducerClass(WordcountReducer.class);               //设置reduecer类
            job.setNumReduceTasks(3);                         //设置reduce个数
    
            job.setMapOutputKeyClass(Text.class);            //设置之map输出key
            job.setMapOutputValueClass(IntWritable.class);   //设置map输出value
            job.setOutputKeyClass(Text.class);               //设置mapreduce 输出key
            job.setOutputValueClass(IntWritable.class);      //设置mapreduce输出value
            job.waitForCompletion(true);
        }
    }
    

    4、工具类获取进程

    package com.cr.wordcount;
    
    import java.lang.management.ManagementFactory;
    import java.net.InetAddress;
    import java.net.UnknownHostException;
    
    public class Utils {
    
        public static String getInfo(Object o,String msg){
            return "主机名:" + getHostname() + "=>" + "进程id" + getPID() + "=>" + "线程ID" +  getTID() + "=>" + "类对象信息" + getObjInfo(o) + "=>" + "运行类" + msg;
        }
    
        /**
         * 获取主机名
         * @return
         */
        public static String getHostname(){
            try {
                return InetAddress.getLocalHost().getHostName();
            } catch (UnknownHostException e) {
                e.printStackTrace();
            }
            return null;
        }
    
        /**
         * 获取当前程序所在进程id
         * @return
         */
        public static int getPID(){
            String info = ManagementFactory.getRuntimeMXBean().getName();
            return Integer.parseInt(info.substring(0,info.indexOf("@")));
        }
    
        /**
         * 获取当前程序所在进程id
         * @return
         */
        public static String getTID(){
            return Thread.currentThread().getName();
        }
    
        /**
         * 获取对象的简单类名和哈希值
         * @param o
         * @return
         */
        public static String getObjInfo(Object o){
            String sname = o.getClass().getSimpleName();
            return sname + "@" + o.hashCode();
    
        }
    }
    


    5、添加core-site.xml和yarn-site.xml

    6、打包,运行在Hadoop集群上

    待计数的文件有三个
    drwxr-xr-x   - xiaoqiu supergroup          0 2018-01-05 09:10 /user
    drwxr-xr-x   - xiaoqiu supergroup          0 2018-01-05 10:44 /user/xiaoqiu
    drwxr-xr-x   - xiaoqiu supergroup          0 2018-01-05 12:06 /user/xiaoqiu/data
    -rw-r--r--   2 xiaoqiu supergroup         12 2018-01-05 10:45 /user/xiaoqiu/data/wc.txt
    -rw-r--r--   2 xiaoqiu supergroup         18 2018-01-05 12:06 /user/xiaoqiu/data/wc1.txt
    -rw-r--r--   2 xiaoqiu supergroup         16 2018-01-05 12:06 /user/xiaoqiu/data/wc2.txt
    

    运行jar包

    [xiaoqiu@s150 /home/xiaoqiu]$ hadoop jar wordcounter.jar com.cr.wordcount.WordcountApp hdfs://s150/user/xiaoqiu/data hdfs://s150/user/xiaoqiu/data/out
    

    7、运行结果

            File System Counters
                    FILE: Number of bytes read=124
                    FILE: Number of bytes written=732401
                    FILE: Number of read operations=0
                    FILE: Number of large read operations=0
                    FILE: Number of write operations=0
                    HDFS: Number of bytes read=351
                    HDFS: Number of bytes written=44
                    HDFS: Number of read operations=18
                    HDFS: Number of large read operations=0
                    HDFS: Number of write operations=6
            Job Counters
                    Killed map tasks=2
                    Launched map tasks=4
                    Launched reduce tasks=3
                    Data-local map tasks=4
                    Total time spent by all maps in occupied slots (ms)=79906
                    Total time spent by all reduces in occupied slots (ms)=74886
                    Total time spent by all map tasks (ms)=79906
                    Total time spent by all reduce tasks (ms)=74886
                    Total vcore-milliseconds taken by all map tasks=79906
                    Total vcore-milliseconds taken by all reduce tasks=74886
                    Total megabyte-milliseconds taken by all map tasks=81823744
                    Total megabyte-milliseconds taken by all reduce tasks=76683264
            Map-Reduce Framework
                    Map input records=3
                    Map output records=10
                    Map output bytes=86
                    Map output materialized bytes=160
                    Input split bytes=305
                    Combine input records=10
                    Combine output records=10
                    Reduce input groups=7
                    Reduce shuffle bytes=160
                    Reduce input records=10
                    Reduce output records=7
                    Spilled Records=20
                    Shuffled Maps =9
                    Failed Shuffles=0
                    Merged Map outputs=9
                    GC time elapsed (ms)=646
                    CPU time spent (ms)=5080
                    Physical memory (bytes) snapshot=899522560
                    Virtual memory (bytes) snapshot=12507885568
                    Total committed heap usage (bytes)=416440320
            Shuffle Errors
                    BAD_ID=0
                    CONNECTION=0
                    IO_ERROR=0
                    WRONG_LENGTH=0
                    WRONG_MAP=0
                    WRONG_REDUCE=0
            m组
                    主机名:s150=>进程id6421=>线程IDmain=>类对象信息WorcountMapper@1910896157=>运=4
                    主机名:s151=>进程id3857=>线程IDmain=>类对象信息WorcountMapper@479734028=>运行=2
                    主机名:s152=>进程id3080=>线程IDmain=>类对象信息WorcountMapper@479734028=>运行=4
            File Input Format Counters
                    Bytes Read=46
            File Output Format Counters
                    Bytes Written=44
            r组
                    主机名:s150=>进程id6421=>线程IDmain=>类对象信息WordcountReducer@612693043=>=2
                    主机名:s150=>进程id6421=>线程IDmain=>类对象信息WordcountReducer@878991463=>=2
                    主机名:s151=>进程id3816=>线程IDmain=>类对象信息WordcountReducer@275056979=>=3
                    主机名:s151=>进程id3857=>线程IDmain=>类对象信息WordcountReducer@905847077=>=1
                    主机名:s151=>进程id3857=>线程IDmain=>类对象信息WordcountReducer@908384914=>=1
                    主机名:s152=>进程id3080=>线程IDmain=>类对象信息WordcountReducer@156199931=>=1
                    主机名:s152=>进程id3080=>线程IDmain=>类对象信息WordcountReducer@905847077=>=1
                    主机名:s152=>进程id3080=>线程IDmain=>类对象信息WordcountReducer@908384914=>=2
                    主机名:s152=>进程id3128=>线程IDmain=>类对象信息WordcountReducer@275056979=>=2
                    主机名:s152=>进程id3168=>线程IDmain=>类对象信息WordcountReducer@275056979=>=2
    


    欢迎关注我的公众号:小秋的博客 CSDN博客:https://blog.csdn.net/xiaoqiu_cr github:https://github.com/crr121 联系邮箱:rongchen633@gmail.com 有什么问题可以给我留言噢~
  • 相关阅读:
    不要忘了你曾经会写sql的
    一个定期压缩、删除过期数据的emp脚本
    xcode中如何安装多个版本的模拟器
    Xcode4.4中,代码无法高亮、无法自动补全
    xml有哪些解析技术?区别是什么?
    XML有哪些解析方式有何优缺点?xml有哪些解析技术?区别是什么?
    ASIHttpRequest网络请求第三方类库使用方法详解
    iPhone中如何判断当前相机是否可用
    NSArray 跟 NSMutableArray 使用 区别
    通常我们使用[NSDate date]方法得到的时间与当前时间不一致,如何解决?
  • 原文地址:https://www.cnblogs.com/flyingcr/p/10326963.html
Copyright © 2011-2022 走看看