zoukankan      html  css  js  c++  java
  • 大数据学习(4)MapReduce编程Helloworld:WordCount

    Maven依赖:

    <dependency>
            <groupId>jdk.tools</groupId>
            <artifactId>jdk.tools</artifactId>
            <version>1.6</version>
            <scope>system</scope>
            <systemPath>${JAVA_HOME}/lib/tools.jar</systemPath>
        </dependency>
        <dependency>
            <groupId>org.apache.hadoop</groupId>
            <artifactId>hadoop-hdfs</artifactId>
            <version>2.6.5</version>
        </dependency>
            
        <dependency>
            <groupId>org.apache.hadoop</groupId>
            <artifactId>hadoop-common</artifactId>
            <version>2.6.5</version>
        </dependency>
        <dependency>
            <groupId>org.apache.hadoop</groupId>
            <artifactId>hadoop-mapreduce-client-core</artifactId>
            <version>2.6.5</version>
        </dependency>

    Mapper类:

    public class WordcountMapper extends Mapper<LongWritable,Text,Text,IntWritable>{
    
        @Override
        protected void map(LongWritable key, Text value, Context context)throws IOException, InterruptedException {
            
            String line = value.toString();
            
            for(String word : line.split(" ")) {
                context.write(new Text(word), new IntWritable(1));
            }
            
        }
    }

    Reducer类:

    public class WordcountReducer extends Reducer<Text, IntWritable,Text, IntWritable> {
    
        @Override
        protected void reduce(Text key, Iterable<IntWritable> values,Context context) throws IOException, InterruptedException {
            int count = 0;
            for(IntWritable value : values) {
                count += value.get();
            }
            context.write(key , new IntWritable(count));
        }
    }

    启动类:

    public class WordcountLancher {
    
        public static void main(String[] args) throws Exception{
            String inputPath = args[0];
            String outputPath = args[1];
            
            Job job = Job.getInstance();
            
            job.setMapperClass(WordcountMapper.class);
            job.setReducerClass(WordcountReducer.class);
            
            job.setMapOutputKeyClass(Text.class);
            job.setMapOutputValueClass(IntWritable.class);
            
            job.setOutputKeyClass(Text.class);
            job.setOutputValueClass(IntWritable.class);
            
            
            FileInputFormat.setInputPaths(job, new Path(inputPath));
            FileOutputFormat.setOutputPath(job, new Path(outputPath));
            
            boolean success = job.waitForCompletion(true);
            
            System.exit(success ? 0 : 1);
            
        }
        
    }

    在HDFS中准备输入数据:

    hadoop fs -mkdir -p /wordcount/input
    
    hadoop fs -put LICENSE.txt /wordcount/input

    记得启动yarn:

    start-yarn.sh

    启动map-reduce程序:

     hadoop jar wordcount.jar me.huqiao.hadoop.mr.WordcountLancher /wordcount/input /wordcount/output

    查看结果:

    hadoop fs -cat /wordcount/output/part-r-00000 |more
  • 相关阅读:
    iOS之POST与GET的优缺点
    iOS之设置头像(访问系统相册、本地上传)
    iOS之清除缓存
    iOS之自动调节输入文本框的高度
    iOS之隐藏键盘的方式
    iOS之关于 srand() 和rand()
    Android Studio移除模块
    Android 弹出输入框
    webApi添加视图出现/Index.cshtml”处的视图必须派生自 WebViewPage 或 WebViewPage<TModel>。
    JSON Web Tokens简单学习
  • 原文地址:https://www.cnblogs.com/at0x7c00/p/8054043.html
Copyright © 2011-2022 走看看