zoukankan      html  css  js  c++  java
  • window下WordCount开发

    前面文章已经搭建好windows环境。上述环境都是使用到2.6.0版本的hadoop,如果有不懂的地方可以联系QQ1565189664  ,接下来主要讨论如何在windows本地中执行MapReduce 任务

    这里使用Maven作为项目管理工具,jdk1.8

    项目结构如下

    WCount.java文件代码如下:

    package cn.itcast.hadoop.mr;
    
    import org.apache.hadoop.conf.Configuration;
    import org.apache.hadoop.fs.Path;
    import org.apache.hadoop.io.LongWritable;
    import org.apache.hadoop.io.Text;
    import org.apache.hadoop.mapreduce.Job;
    import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
    import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
    
    public class WCount {
    
        public static void main(String[] args) {
    
             
            System.out.println(args.toString());
            
    
            Configuration conf = new Configuration();
                
                conf.setInt("mapreduce.client.submit.file.replication", 20);
                /*conf.set("mapreduce.framework.name", "yarn");
                conf.set("mapred.job.tracker", "namenode:9001");
                conf.set("fs.hdfs.impl", "org.apache.hadoop.hdfs.DistributedFileSystem");
                conf.set("mapreduce.framework.name", "yarn");
                conf.addResource("core-site.xml");
                conf.addResource("hdfs-site.xml");
                conf.addResource("mapred-site.xml");
                conf.addResource("yarn-site.xml");*/
                //conf.set("mapred.jar", "D:\workspace\Hadoop\target\Hadoop.jar");
                Job job=null;
                try {
                    job = Job.getInstance(conf);
                    //notice
                    job.setJarByClass(WCount.class);
                    
                    //set mapper`s property
                    job.setMapperClass(WCMapper.class);
                    job.setMapOutputKeyClass(Text.class);
                    job.setMapOutputValueClass(LongWritable.class);
                    FileInputFormat.setInputPaths(job, new Path("hdfs://192.168.1.2:9000/lzh/word.txt"));
    
                    //set reducer`s property
                    job.setReducerClass(WCReducer.class);
                    job.setOutputKeyClass(Text.class);
                    job.setOutputValueClass(LongWritable.class);
                    FileOutputFormat.setOutputPath(job, new Path("hdfs://192.168.1.2:9000/lzh/wcut"));
                    
                    //submit
                    job.waitForCompletion(true);
                } catch (Exception e) {
                    // TODO Auto-generated catch block
                    e.printStackTrace();
                }
                
            
    
        }
    
    }

    WCMapper.java  代码如下:

    package cn.itcast.hadoop.mr;
    
    import java.io.IOException;
    
    import org.apache.hadoop.io.LongWritable;
    import org.apache.hadoop.io.Text;
    import org.apache.hadoop.mapreduce.Mapper;
    
    public class WCMapper extends Mapper<LongWritable, Text, Text, LongWritable>{
    
        @Override
        protected void map(LongWritable key, Text value, Context context)
                throws IOException, InterruptedException {
            //accept
            String line = value.toString();
            //split
            String[] words = line.split(" ");
            //loop
            for(String w : words){
                //send
                //System.out.println(w);
                context.write(new Text(w), new LongWritable(1));
            }
        }
    
        
        
        
        
    
    }

    WCReducer.java 代码如下

    package cn.itcast.hadoop.mr;
    
    import java.io.IOException;
    
    import org.apache.hadoop.io.LongWritable;
    import org.apache.hadoop.io.Text;
    
    import java.io.IOException;
    
    import org.apache.hadoop.io.LongWritable;
    import org.apache.hadoop.io.Text;
    import org.apache.hadoop.mapreduce.Reducer;
    public class WCReducer extends Reducer<Text, LongWritable, Text, LongWritable>{
         
        @Override
        protected void reduce(Text key, Iterable<LongWritable> values, Context context)
                throws IOException, InterruptedException {
            
            //define a counter
            long counter = 0;
            //loop
            for(LongWritable l : values){
                counter += l.get();
            }
            //write
            context.write(key, new LongWritable(counter));
        }
        
     
    }

    pom.xml

    <project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
      <modelVersion>4.0.0</modelVersion>
      <groupId>HadoopJar</groupId>
      <artifactId>Hadoop</artifactId>
      <version>0.0.1-SNAPSHOT</version>
      <name>Hadoop</name>
      <dependencies>
      <!-- https://mvnrepository.com/artifact/org.apache.hadoop/hadoop-common -->
    <dependency>
        <groupId>org.apache.hadoop</groupId>
        <artifactId>hadoop-common</artifactId>
        <version>2.7.2</version>
    </dependency>
      <!-- https://mvnrepository.com/artifact/org.apache.hadoop/hadoop-mapreduce-client-core -->
    <dependency>
        <groupId>org.apache.hadoop</groupId>
        <artifactId>hadoop-mapreduce-client-core</artifactId>
        <version>2.7.2</version>
    </dependency>
      <!-- https://mvnrepository.com/artifact/org.apache.hadoop/hadoop-hdfs -->
    <dependency>
        <groupId>org.apache.hadoop</groupId>
        <artifactId>hadoop-hdfs</artifactId>
        <version>2.7.2</version>
    </dependency>
      <!-- https://mvnrepository.com/artifact/org.apache.hadoop/hadoop-mapreduce-client-common -->
    <dependency>
        <groupId>org.apache.hadoop</groupId>
        <artifactId>hadoop-mapreduce-client-common</artifactId>
        <version>2.7.2</version>
    </dependency>
    <!-- https://mvnrepository.com/artifact/org.apache.hadoop/hadoop-mapreduce-client-jobclient -->
    <dependency>
        <groupId>org.apache.hadoop</groupId>
        <artifactId>hadoop-mapreduce-client-jobclient</artifactId>
        <version>2.7.2</version>
    </dependency>
    
    <dependency>
        <groupId>jdk.tools</groupId>
        <artifactId>jdk.tools</artifactId>
        <version>1.8</version>
        <scope>system</scope>
        <systemPath>${JAVA_HOME}/lib/tools.jar</systemPath>
    </dependency>
      </dependencies>
      <build>
            <finalName>Hadoop</finalName>
            <plugins>
                <plugin>
                    <artifactId>maven-compiler-plugin</artifactId>
                    <configuration>
                        <source>1.8</source>
                        <target>1.8</target>
                        <encoding>UTF-8</encoding>
                    </configuration>
                </plugin>
                <plugin>  
                    <groupId>org.apache.maven.plugins</groupId>  
                    <artifactId>maven-resources-plugin</artifactId>  
                    <configuration>  
                        <encoding>UTF-8</encoding>  
                    </configuration>  
                </plugin>  
            </plugins>
        </build>
    </project>
    hadoop-mapreduce-client-jobclient  这个jar包在此时可以不需要加入也可以正常运行,这个是在远程提交job的时候有用

    log.properties

    log4j.rootLogger=DEBUG,stdout,R
     
    log4j.appender.stdout=org.apache.log4j.ConsoleAppender 
    log4j.appender.stdout.layout=org.apache.log4j.PatternLayout 
    log4j.appender.stdout.layout.ConversionPattern=%5p - %m%n
     
    log4j.appender.R=org.apache.log4j.RollingFileAppender 
    log4j.appender.R.File=mapreduce_test.log 
    log4j.appender.R.MaxFileSize=1MB 
    log4j.appender.R.MaxBackupIndex=1 
    log4j.appender.R.layout=org.apache.log4j.PatternLayout 
    log4j.appender.R.layout.ConversionPattern=%p %t %c - %m%n 
    log4j.logger.com.codefutures=INFO 

    上述代码就可以执行一个word.txt 文件字母统计,word.txt 格式如下:

    w e r t t t y y u 

    d g h j k k l d f 

    至此,简单的Mapreduce本地执行方式已经完成,大家去尝试吧!

  • 相关阅读:
    markdown基础
    img alt与title的区别
    关于echarts 报错 初始化对象未定义
    easyui datagrid 右边框被隐藏
    ie 浏览器文本输入框和密码输入框的默认样式
    谷歌内核浏览器 iframe内容的 tab切换 滚动条消失
    导入sass文件
    嵌套规则
    关于ruby -gem无法切换淘宝源
    sass 变量
  • 原文地址:https://www.cnblogs.com/bornteam/p/6554039.html
Copyright © 2011-2022 走看看