zoukankan      html  css  js  c++  java
  • 大数据学习(4)MapReduce编程Helloworld:WordCount

    Maven依赖:

    <dependency>
            <groupId>jdk.tools</groupId>
            <artifactId>jdk.tools</artifactId>
            <version>1.6</version>
            <scope>system</scope>
            <systemPath>${JAVA_HOME}/lib/tools.jar</systemPath>
        </dependency>
        <dependency>
            <groupId>org.apache.hadoop</groupId>
            <artifactId>hadoop-hdfs</artifactId>
            <version>2.6.5</version>
        </dependency>
            
        <dependency>
            <groupId>org.apache.hadoop</groupId>
            <artifactId>hadoop-common</artifactId>
            <version>2.6.5</version>
        </dependency>
        <dependency>
            <groupId>org.apache.hadoop</groupId>
            <artifactId>hadoop-mapreduce-client-core</artifactId>
            <version>2.6.5</version>
        </dependency>

    Mapper类:

    public class WordcountMapper extends Mapper<LongWritable,Text,Text,IntWritable>{
    
        @Override
        protected void map(LongWritable key, Text value, Context context)throws IOException, InterruptedException {
            
            String line = value.toString();
            
            for(String word : line.split(" ")) {
                context.write(new Text(word), new IntWritable(1));
            }
            
        }
    }

    Reducer类:

    public class WordcountReducer extends Reducer<Text, IntWritable,Text, IntWritable> {
    
        @Override
        protected void reduce(Text key, Iterable<IntWritable> values,Context context) throws IOException, InterruptedException {
            int count = 0;
            for(IntWritable value : values) {
                count += value.get();
            }
            context.write(key , new IntWritable(count));
        }
    }

    启动类:

    public class WordcountLancher {
    
        public static void main(String[] args) throws Exception{
            String inputPath = args[0];
            String outputPath = args[1];
            
            Job job = Job.getInstance();
            
            job.setMapperClass(WordcountMapper.class);
            job.setReducerClass(WordcountReducer.class);
            
            job.setMapOutputKeyClass(Text.class);
            job.setMapOutputValueClass(IntWritable.class);
            
            job.setOutputKeyClass(Text.class);
            job.setOutputValueClass(IntWritable.class);
            
            
            FileInputFormat.setInputPaths(job, new Path(inputPath));
            FileOutputFormat.setOutputPath(job, new Path(outputPath));
            
            boolean success = job.waitForCompletion(true);
            
            System.exit(success ? 0 : 1);
            
        }
        
    }

    在HDFS中准备输入数据:

    hadoop fs -mkdir -p /wordcount/input
    
    hadoop fs -put LICENSE.txt /wordcount/input

    记得启动yarn:

    start-yarn.sh

    启动map-reduce程序:

     hadoop jar wordcount.jar me.huqiao.hadoop.mr.WordcountLancher /wordcount/input /wordcount/output

    查看结果:

    hadoop fs -cat /wordcount/output/part-r-00000 |more
  • 相关阅读:
    openlayers方法总结
    AJAX 数据库实例
    AJAX 请求服务器
    得到XMLHttpRequest对象
    AJAX 简介
    AJAX 服务器端的脚本
    HTTP GET 最多发送100个字符
    AJAX XMLHttpRequest 对象
    Dictionary、ArrayList、Hashtable和数组 Array 的区别
    AJAX 请求实例
  • 原文地址:https://www.cnblogs.com/at0x7c00/p/8054043.html
Copyright © 2011-2022 走看看