zoukankan      html  css  js  c++  java
  • hadoop mapreduce

    写在前面:

    需要保证hadoop版本  各个jar版本一致,否则可能出现各种哦莫名奇妙的错误!

    maven 依赖:

    <?xml version="1.0" encoding="UTF-8"?>
    <project xmlns="http://maven.apache.org/POM/4.0.0"
             xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
             xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
        <modelVersion>4.0.0</modelVersion>
        <packaging>jar</packaging>
        <groupId>BaseTecLearn</groupId>
        <artifactId>BaseTecLearn</artifactId>
        <version>1.0-SNAPSHOT</version>
    
        <dependencies>
            <dependency>
                <groupId>org.apache.spark</groupId>
                <artifactId>spark-core_2.11</artifactId>
                <version>2.2.0</version>
            </dependency>
            <dependency>
                <groupId>org.apache.spark</groupId>
                <artifactId>spark-sql_2.11</artifactId>
                <version>2.2.0</version>
            </dependency>
            <dependency>
                <groupId>org.apache.thrift</groupId>
                <artifactId>libthrift</artifactId>
                <version>0.6.1</version>
            </dependency>
            <dependency>
                <groupId>org.apache.hadoop</groupId>
                <artifactId>hadoop-common</artifactId>
                <version>2.7.1</version>
            </dependency>
            <dependency>
                <groupId>org.apache.hadoop</groupId>
                <artifactId>hadoop-mapreduce-client-core</artifactId>
                <version>2.7.4</version>
            </dependency>
        </dependencies>
    </project>
    View Code

    resource目录下配置日志(很重要,可以查看警告啥的)

    log4j.rootLogger=WARN,stdout,logfile  
    log4j.appender.stdout=org.apache.log4j.ConsoleAppender  
    log4j.appender.stdout.layout=org.apache.log4j.PatternLayout  
    log4j.appender.stdout.layout.ConversionPattern=%d %p [%c] - %m%n  
    
    log4j.appender.logfile=org.apache.log4j.FileAppender  
    log4j.appender.logfile.File=hadoop.log   
    log4j.appender.logfile.layout=org.apache.log4j.PatternLayout  
    log4j.appender.logfile.layout.ConversionPattern=%d %p [%c] - %m%ns  
    package top.letsgogo;
    
    import java.io.IOException;
    import java.util.StringTokenizer;
    
    import org.apache.hadoop.conf.Configuration;
    import org.apache.hadoop.fs.Path;
    import org.apache.hadoop.io.IntWritable;
    import org.apache.hadoop.io.Text;
    import org.apache.hadoop.mapreduce.Job;
    import org.apache.hadoop.mapreduce.Mapper;
    import org.apache.hadoop.mapreduce.Reducer;
    import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
    import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
    
    public class WordCount {
    
      public static class TokenizerMapper
           extends Mapper<Object, Text, Text, IntWritable>{
    
        private final static IntWritable one = new IntWritable(1);
        private Text word = new Text();
    
        public void map(Object key, Text value, Context context
                        ) throws IOException, InterruptedException {
          StringTokenizer itr = new StringTokenizer(value.toString());
          while (itr.hasMoreTokens()) {
            word.set(itr.nextToken());
            context.write(word, one);
          }
        }
      }
    
      public static class IntSumReducer
           extends Reducer<Text,IntWritable,Text,IntWritable> {
        private IntWritable result = new IntWritable();
    
        public void reduce(Text key, Iterable<IntWritable> values,
                           Context context
                           ) throws IOException, InterruptedException {
          int sum = 0;
          for (IntWritable val : values) {
            sum += val.get();
          }
          result.set(sum);
          context.write(key, result);
        }
      }
    
      public static void main(String[] args) throws Exception {
        Configuration conf = new Configuration();
        Job job = Job.getInstance(conf, "word count");
        job.setJarByClass(WordCount.class);
        job.setMapperClass(TokenizerMapper.class);
        job.setCombinerClass(IntSumReducer.class);
        job.setReducerClass(IntSumReducer.class);
        job.setOutputKeyClass(Text.class);
        job.setOutputValueClass(IntWritable.class);
        FileInputFormat.addInputPath(job, new Path("/home/panteng/IdeaProjects/pushscore-sdk/baseTecLearn/target/classes/regular.txt"));
        FileOutputFormat.setOutputPath(job, new Path("/home/panteng/IdeaProjects/pushscore-sdk/baseTecLearn/target/classes/regular"));
        System.out.println(job.waitForCompletion(true));
        //System.exit(job.waitForCompletion(true) ? 0 : 1);
      }
    }
  • 相关阅读:
    elementUI 表格分页后台排序记录
    oracle乱码记录
    JavaScript 数字转汉字+element时间选择器快速选择
    js中call()方法和apply方法的使用
    Rails导出CSV
    CakePHP2.x 发送邮件
    一个例子说明substr(), mb_substr() 和 mb_strcut()之间的区别
    substr是不安全的
    CakePHP中回调函数的使用
    cakephp中find('list')的使用
  • 原文地址:https://www.cnblogs.com/tengpan-cn/p/7553495.html
Copyright © 2011-2022 走看看