zoukankan      html  css  js  c++  java
  • hadoop之 mapreduce example(1)

    环境:
    jdk: 1.8
    hadoop: 2.6.4

    MapReduce 数据流

    mapper:

    package com.hadoop.mapreduce;
    
    import org.apache.hadoop.io.IntWritable;
    import org.apache.hadoop.io.LongWritable;
    import org.apache.hadoop.io.Text;
    import org.apache.hadoop.mapreduce.Mapper;
    
    import java.io.IOException;
    
    /**
     * Created by madong on 2016/05/22.
     */
    public class MaxTemperatureMapper extends Mapper<LongWritable,Text,Text,IntWritable>{
        private static final int MISSING = 9999;
    
        @Override
        protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
    
            System.out.println("mapper input:"+ key + "  "+ value);
            String line = value.toString();
            String year = line.substring(15,19);
            int airTemperature;
            if (line.charAt(87) == '+'){
                airTemperature = Integer.parseInt(line.substring(88,92));
            }else {
                airTemperature = Integer.parseInt(line.substring(87,92));
            }
    
            String quality = line.substring(92,93);
    
            if (airTemperature != MISSING && quality.matches("[01459]")) {
                context.write(new Text(year), new IntWritable(airTemperature));
            }
    
        }
    }
    
    

    reducer:

    package com.hadoop.mapreduce;
    
    import org.apache.hadoop.io.IntWritable;
    import org.apache.hadoop.io.Text;
    import org.apache.hadoop.mapreduce.Reducer;
    
    import java.io.IOException;
    
    /**
     * Created by madong on 2016/05/22.
     */
    public class MaxTemperatureReducer extends Reducer<Text,IntWritable,Text,IntWritable> {
    
        @Override
        protected void reduce(Text key, Iterable<IntWritable> values, Context context) throws IOException, InterruptedException {
    
            int maxValue = Integer.MIN_VALUE;
    
            for (IntWritable value :values){
                maxValue = Math.max(maxValue,value.get());
            }
            context.write(key,new IntWritable(maxValue));
        }
    }
    
    

    job:

    package com.hadoop.mapreduce;
    
    import org.apache.hadoop.conf.Configuration;
    import org.apache.hadoop.fs.Path;
    import org.apache.hadoop.io.IntWritable;
    import org.apache.hadoop.io.Text;
    import org.apache.hadoop.mapreduce.Job;
    import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
    import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
    
    /**
     * Created by madong on 2016/05/22.
     */
    public class MaxTemperatureJob {
    
        private final static String INPUT_PATH = "hdfs://lenovo:9000/hadoop/temp";
        private final static String OUT_PUTH = "hdfs://lenovo:9000/hadoop/out";
    
        public static void main(String[] args) {
    
    
            try {
                //远程连接 配置 如果不是windows远程连接可以不用配置Configuration
                Configuration configuration = new Configuration();
                configuration.set("fs.defaultFS", "hdfs://lenovo:9000");
                configuration.set("mapreduce.framework.name", "yarn");
                configuration.set("yarn.resourcemanager.address", "lenovo:8032");
                configuration.set("yarn.resourcemanager.scheduler.address", "lenovo:8030");
                configuration.set("mapred.jar", "D:\tool\IdeaProjects\hadoop\out\artifacts\hadoop_jar\hadoop.jar");
    
                Job job = Job.getInstance(configuration);
    
                job.setJarByClass(MaxTemperatureJob.class);
                job.setJobName("Max temperature");
    
                FileInputFormat.addInputPath(job,new Path(INPUT_PATH));
                FileOutputFormat.setOutputPath(job,new Path(OUT_PUTH));
    
                job.setMapperClass(MaxTemperatureMapper.class);
                job.setReducerClass(MaxTemperatureReducer.class);
    
                job.setOutputKeyClass(Text.class);
                job.setOutputValueClass(IntWritable.class);
    
                System.exit(job.waitForCompletion(true) ? 0:1);
    
            } catch (Exception e) {
                e.printStackTrace();
            }
        }
    }
    
    
    用放荡不羁的心态过随遇而安的生活
  • 相关阅读:
    sqlchemy self made
    scrapy 自定义图片路径保存,并存到数据库中
    关于scrapy下载文件重命名的办法以及对应url没有文件后缀的办法
    下载转码
    scrapy 下载图片 from cuiqingcai
    Scrapy框架学习
    字符串处理
    scrapy 日志处理
    sqlalchemy多对多查询
    sqlalchemy 多对多关系
  • 原文地址:https://www.cnblogs.com/re-myself/p/5518283.html
Copyright © 2011-2022 走看看