zoukankan      html  css  js  c++  java
  • mapduce 入门操作

    首先准备程序代码

    import java.io.IOException;
    
    import org.apache.hadoop.io.LongWritable;
    import org.apache.hadoop.io.Text;
    import org.apache.hadoop.mapreduce.Mapper;
    
    public class WCMapper  extends Mapper<LongWritable,Text , Text, LongWritable>
    {
    
        @Override
        protected void map(LongWritable key, Text value,Context context)
        {
            //接受value数据
            String line=value.toString();
            //切分数据
            String[] words=line.split(" ");
            //循环
            for(String w : words)
            {
                //出现一次  寄一个1      输出
                try {
                    context.write(new Text(w), new LongWritable(1));
                } catch (IOException e) {
                    // TODO Auto-generated catch block
                    e.printStackTrace();
                } catch (InterruptedException e) {
                    // TODO Auto-generated catch block
                    e.printStackTrace();
                }
    
            }
        }
    
    }
    
    
    
    
    
    
    
    
    
    
    import java.io.IOException;
    
    import org.apache.hadoop.io.LongWritable;
    import org.apache.hadoop.io.Text;
    import org.apache.hadoop.mapreduce.Reducer;
    
    public class WCReduce extends Reducer<Text, LongWritable, Text, LongWritable>{
    
        @Override
        protected void reduce(Text key, Iterable<LongWritable> v2s,Context context)
                throws IOException, InterruptedException {
            //接收数据
            //定义一个计算器
            long  counter =0;
            //循环  v2s
            for(LongWritable i : v2s)
            {
                counter+=i.get();//返回 long 类型
            }
            //输出
            context.write(key,new LongWritable(counter));
        }
    
    
    }
    
    
    
    
    
    
    
    
    
    
    
    
    
    
    import org.apache.hadoop.conf.Configuration;
    import org.apache.hadoop.fs.Path;
    import org.apache.hadoop.io.LongWritable;
    import org.apache.hadoop.io.Text;
    import org.apache.hadoop.mapreduce.Job;
    import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
    import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
    
    public class WordCount {
        public static void main(String[] args) throws Exception {
            Job job=Job.getInstance(new Configuration());
    
    
            //main 所在的类
            job.setJarByClass(WordCount.class);
    
            job.setMapperClass(WCMapper.class);
            job.setMapOutputKeyClass(Text.class);
            job.setMapOutputValueClass(LongWritable.class);
            FileInputFormat.setInputPaths(job, new Path("/words.txt"));
            //设置 reducer 相关 属性
            job.setReducerClass(WCReduce.class);
            job.setOutputKeyClass(Text.class);
            job.setOutputValueClass(LongWritable.class);
            FileOutputFormat.setOutputPath(job, new Path("/wcout666"));
            job.waitForCompletion(true);
        }
    
    }
    
    
    
    
    <dependencies>
            <dependency>
                <groupId>org.apache.hadoop</groupId>
                <artifactId>hadoop-common</artifactId>
                <version>2.9.0</version>
            </dependency>
            <dependency>
                <groupId>org.apache.hadoop</groupId>
                <artifactId>hadoop-hdfs</artifactId>
                <version>2.9.0</version>
            </dependency>
    
            <!-- https://mvnrepository.com/artifact/org.apache.hadoop/hadoop-mapreduce-client-core -->
            <dependency>
                <groupId>org.apache.hadoop</groupId>
                <artifactId>hadoop-mapreduce-client-core</artifactId>
                <version>2.9.0</version>
            </dependency>
    
        </dependencies>
    View Code

      main方法的最后一个  waitforcomplete 表示将运行进度等信息及时输出给用户

    编译器和集群  jdk版本要一样

    maven打包

    IDEA
    
     先添加包文件 
    FILE   ------>    Project    Structure   --------->   Artifacts
    
    
    打包
    
    Build  ------>  Build   Artifact     ----------------> BUILD  生成jar包

    运行

    Hadoop  jar   xxxx.jar    WordCount
    RUSH B
  • 相关阅读:
    第10组 Alpha冲刺(4/6)
    第10组 Alpha冲刺(3/6)
    第10组 Alpha冲刺(2/6)
    第10组 Alpha冲刺(1/6)
    第10组 团队Git现场编程实战
    第10组 团队项目-需求分析报告
    团队项目-选题报告
    【软件工程】Alpha冲刺(4/6)
    【软件工程】Alpha冲刺(3/6)
    【软件工程】Alpha冲刺(2/6)
  • 原文地址:https://www.cnblogs.com/tangsonghuai/p/11029167.html
Copyright © 2011-2022 走看看