zoukankan      html  css  js  c++  java
  • MapReduce计算每年最大值

    1. 测试文件生成程序,参考

    https://www.cnblogs.com/jonban/p/10555364.html

    MapReduce程序示例如下:

    2. 新建Maven项目  hadoop

    3. pom.xml

    <project xmlns="http://maven.apache.org/POM/4.0.0"
        xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
        xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 
            http://maven.apache.org/xsd/maven-4.0.0.xsd">
    
    
        <modelVersion>4.0.0</modelVersion>
        <groupId>com.java</groupId>
        <artifactId>hadoop</artifactId>
        <version>1.0.0</version>
    
    
        <dependencies>
            <dependency>
                <groupId>org.apache.hadoop</groupId>
                <artifactId>hadoop-common</artifactId>
                <version>3.2.0</version>
            </dependency>
            <dependency>
                <groupId>org.apache.hadoop</groupId>
                <artifactId>hadoop-hdfs</artifactId>
                <version>3.2.0</version>
            </dependency>
    
            <dependency>
                <groupId>org.apache.hadoop</groupId>
                <artifactId>hadoop-client</artifactId>
                <version>3.2.0</version>
            </dependency>
    
        </dependencies>
    
        <build>
            <finalName>${project.artifactId}</finalName>
            <plugins>
                <plugin>
                    <groupId>org.apache.maven.plugins</groupId>
                    <artifactId>maven-compiler-plugin</artifactId>
                    <version>3.8.0</version>
                    <configuration>
                        <source>1.8</source>
                        <target>1.8</target>
                        <encoding>UTF-8</encoding>
                    </configuration>
                </plugin>
            </plugins>
        </build>
    </project>

    4.   MaxMapper.java

    package com.java.mapreduce;
    
    import java.io.IOException;
    
    import org.apache.hadoop.io.IntWritable;
    import org.apache.hadoop.io.LongWritable;
    import org.apache.hadoop.io.Text;
    import org.apache.hadoop.mapreduce.Mapper;
    
    /**
     * 按年份映射分组
     * 
     * @author Logan
     * @createDate 2019-03-18
     * @version 1.0.0
     *
     */
    public class MaxMapper extends Mapper<LongWritable, Text, Text, IntWritable> {
    
        @Override
        protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
    
            String line = value.toString();
            String year = line.substring(0, 4);
            int num = Integer.parseInt(line.substring(8, 12));
    
            context.write(new Text(year), new IntWritable(num));
        }
    
    }

    5.   MaxReducer.java

    package com.java.mapreduce;
    
    import java.io.IOException;
    
    import org.apache.hadoop.io.IntWritable;
    import org.apache.hadoop.io.Text;
    import org.apache.hadoop.mapreduce.Reducer;
    
    /**
     * 计算每年数据中的最大值
     * 
     * @author Logan
     * @createDate 2019-03-18
     * @version 1.0.0
     *
     */
    public class MaxReducer extends Reducer<Text, IntWritable, Text, IntWritable> {
    
        @Override
        protected void reduce(Text key, Iterable<IntWritable> values, Context context) throws IOException, InterruptedException {
    
            int max = Integer.MIN_VALUE;
            for (IntWritable value : values) {
                max = Math.max(max, value.get());
            }
    
            context.write(key, new IntWritable(max));
        }
    
    }

    6.   MaxJob.java

    package com.java.mapreduce;
    
    import org.apache.hadoop.fs.Path;
    import org.apache.hadoop.io.IntWritable;
    import org.apache.hadoop.io.Text;
    import org.apache.hadoop.mapreduce.Job;
    import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
    import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
    
    /**
     * 主程序入口类
     * 
     * @author Logan
     * @createDate 2019-03-18
     * @version 1.0.0
     *
     */
    public class MaxJob {
        public static void main(String[] args) {
            try {
                Job job = Job.getInstance();
                job.setJarByClass(MaxJob.class);
                job.setJobName("Get Max");
    
                // 输入第一个参数为文件输入路径
                FileInputFormat.addInputPath(job, new Path(args[0]));
    
                // 输入第二个参数为输出结果文件路径
                FileOutputFormat.setOutputPath(job, new Path(args[1]));
    
                job.setMapperClass(MaxMapper.class);
                job.setReducerClass(MaxReducer.class);
    
                job.setOutputKeyClass(Text.class);
                job.setOutputValueClass(IntWritable.class);
    
                job.waitForCompletion(true);
    
            } catch (Exception e) {
                e.printStackTrace();
            }
    
        }
    
    }

    .

  • 相关阅读:
    SSM简单实现文件上传和下载
    Web发送邮件
    scala写算法-快排
    scala写算法-从后缀表达式构造
    scalajs_初体验
    scala写算法-用小根堆解决topK
    scala-Future和Promise
    python基础之函数
    python基础知识(五)
    python基础知识(四)
  • 原文地址:https://www.cnblogs.com/jonban/p/10555826.html
Copyright © 2011-2022 走看看