zoukankan      html  css  js  c++  java
  • mapduce 入门操作

    首先准备程序代码

    import java.io.IOException;
    
    import org.apache.hadoop.io.LongWritable;
    import org.apache.hadoop.io.Text;
    import org.apache.hadoop.mapreduce.Mapper;
    
    public class WCMapper  extends Mapper<LongWritable,Text , Text, LongWritable>
    {
    
        @Override
        protected void map(LongWritable key, Text value,Context context)
        {
            //接受value数据
            String line=value.toString();
            //切分数据
            String[] words=line.split(" ");
            //循环
            for(String w : words)
            {
                //出现一次  寄一个1      输出
                try {
                    context.write(new Text(w), new LongWritable(1));
                } catch (IOException e) {
                    // TODO Auto-generated catch block
                    e.printStackTrace();
                } catch (InterruptedException e) {
                    // TODO Auto-generated catch block
                    e.printStackTrace();
                }
    
            }
        }
    
    }
    
    
    
    
    
    
    
    
    
    
    import java.io.IOException;
    
    import org.apache.hadoop.io.LongWritable;
    import org.apache.hadoop.io.Text;
    import org.apache.hadoop.mapreduce.Reducer;
    
    public class WCReduce extends Reducer<Text, LongWritable, Text, LongWritable>{
    
        @Override
        protected void reduce(Text key, Iterable<LongWritable> v2s,Context context)
                throws IOException, InterruptedException {
            //接收数据
            //定义一个计算器
            long  counter =0;
            //循环  v2s
            for(LongWritable i : v2s)
            {
                counter+=i.get();//返回 long 类型
            }
            //输出
            context.write(key,new LongWritable(counter));
        }
    
    
    }
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    import org.apache.hadoop.conf.Configuration;
    import org.apache.hadoop.fs.Path;
    import org.apache.hadoop.io.LongWritable;
    import org.apache.hadoop.io.Text;
    import org.apache.hadoop.mapreduce.Job;
    import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
    import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
    
    public class WordCount {
        public static void main(String[] args) throws Exception {
            Job job=Job.getInstance(new Configuration());
    
    
            //main 所在的类
            job.setJarByClass(WordCount.class);
    
            job.setMapperClass(WCMapper.class);
            job.setMapOutputKeyClass(Text.class);
            job.setMapOutputValueClass(LongWritable.class);
            FileInputFormat.setInputPaths(job, new Path("/words.txt"));
            //设置 reducer 相关 属性
            job.setReducerClass(WCReduce.class);
            job.setOutputKeyClass(Text.class);
            job.setOutputValueClass(LongWritable.class);
            FileOutputFormat.setOutputPath(job, new Path("/wcout666"));
            job.waitForCompletion(true);
        }
    
    }
    
    
    
    
    <dependencies>
            <dependency>
                <groupId>org.apache.hadoop</groupId>
                <artifactId>hadoop-common</artifactId>
                <version>2.9.0</version>
            </dependency>
            <dependency>
                <groupId>org.apache.hadoop</groupId>
                <artifactId>hadoop-hdfs</artifactId>
                <version>2.9.0</version>
            </dependency>
    
            <!-- https://mvnrepository.com/artifact/org.apache.hadoop/hadoop-mapreduce-client-core -->
            <dependency>
                <groupId>org.apache.hadoop</groupId>
                <artifactId>hadoop-mapreduce-client-core</artifactId>
                <version>2.9.0</version>
            </dependency>
    
        </dependencies>
    View Code

      main方法的最后一个  waitforcomplete 表示将运行进度等信息及时输出给用户

    编译器和集群  jdk版本要一样

    maven打包

    IDEA
    
     先添加包文件 
    FILE   ------>    Project    Structure   --------->   Artifacts
    
    
    打包
    
    Build  ------>  Build   Artifact     ----------------> BUILD  生成jar包

    运行

    Hadoop  jar   xxxx.jar    WordCount
    RUSH B
  • 相关阅读:
    第五周学习进度报告
    第四周学习进度报告
    第三周大数据学习进度
    十六周总结
    程序员修炼之道-从小工到专家阅读笔记03
    第二阶段冲刺10
    利用正则表达式,分割地址至省市县,更新MySQL数据库数据
    阅读笔记--《大型网站技术架构》—01大型网站概述
    第三周周总结——kettle的简单使用以及MYSQL数据库去重
    热词分析中运用可用性战术
  • 原文地址:https://www.cnblogs.com/tangsonghuai/p/11029167.html
Copyright © 2011-2022 走看看