zoukankan      html  css  js  c++  java
  • 【大数据系列】MapReduce示例好友推荐

    package org.slp;
    
    import org.apache.hadoop.io.LongWritable;
    import org.apache.hadoop.io.Text;
    import org.apache.hadoop.mapreduce.Mapper;
    
    import java.io.IOException;
    import java.util.StringTokenizer;
    
    /**
     * Created by sanglp on 2017/7/17.
     */
    public class Test2Mapper extends Mapper<LongWritable ,Text,Text,Text> {
        @Override
        protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
            //super.map(key, value, context);
            String line = value.toString();//一行数据代表一组好友关系
            String[] ss = line.split("	");
            context.write(new Text(ss[0]),new Text(ss[1]));//主从分成两行输出
            context.write(new Text(ss[1]),new Text(ss[0]));
    
        }
    }
    package org.slp;
    
    import org.apache.hadoop.io.Text;
    import org.apache.hadoop.mapreduce.Reducer;
    
    import java.io.IOException;
    import java.util.HashSet;
    import java.util.Iterator;
    import java.util.Set;
    
    /**
     * Created by sanglp on 2017/7/17.
     */
    public class Test2Reduce extends Reducer<Text,Text,Text,Text> {
        @Override
        protected void reduce(Text key, Iterable<Text> values, Context context) throws IOException, InterruptedException {
            //super.reduce(key, values, context);
            Set<String> set = new HashSet<String>();
            for(Text t :values ){
                set.add(t.toString());
            }
            if (set.size()>1){
                for(Iterator j = set.iterator();j.hasNext();){
                    String name = (String)j.next();
                    for(Iterator k = set.iterator();k.hasNext();){
                        String other = (String)k.next();
                        if(!name.equals(other)){
                            context.write(new Text(name),new Text(other));
                        }
                    }
                }
            }
        }
    }
    package org.slp;
    
    import org.apache.hadoop.conf.Configuration;
    import org.apache.hadoop.fs.Path;
    import org.apache.hadoop.io.IntWritable;
    import org.apache.hadoop.io.Text;
    import org.apache.hadoop.mapreduce.Job;
    import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
    import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
    
    import java.io.IOException;
    
    /**
     * Created by sanglp on 2017/7/17.
     */
    public class JobRun2 {
    
        public static void main(String[] args){
            Configuration conf = new Configuration();
            conf.set("mapred.job.tracker","node1:9001");
            conf.set("mapred.job.tracker","node1:9001");
            conf.set("mapred.jar","C:\Users\sanglp\qq.jar");
            try {
                Job job = new Job(conf);
                job.setJobName("qq");
                job.setJarByClass(JobRun2.class);
                job.setMapperClass(Test2Mapper.class);
                job.setReducerClass(Test2Reduce.class);
                job.setMapOutputKeyClass(Text.class);
                job.setMapOutputValueClass(Text.class);
    
                job.setNumReduceTasks(1);//设置reduce任务的个数
                //mapreduce输入数据所在目录或文件
                FileInputFormat.addInputPath(job,new Path("/usr/input/qq"));
                //mr执行之后的输出数据的目录
                FileOutputFormat.setOutputPath(job,new Path("/usr/out/qq"));
                try {
                    System.exit(job.waitForCompletion(true)?0:1);
                } catch (InterruptedException e) {
                    e.printStackTrace();
                } catch (ClassNotFoundException e) {
                    e.printStackTrace();
                }
            } catch (IOException e) {
                e.printStackTrace();
            }
        }
    }

    文件内容例如:

    小明  小李

    小花  小白

  • 相关阅读:
    A program file was not specified in the launch configuration.
    Effective C++条款38: 决不要重新定义继承而来的缺省参数值
    进程控制块
    Effective C++条款37: 决不要重新定义继承而来的非虚函数
    Effective C++条款42: 明智地使用私有继承
    迭代的是人,递归的是神(第一篇——递归调用的分析)
    Effective C++条款36: 区分接口继承和实现继承
    进程上下文
    进程的层次结构 ——进程组捕捉信号
    SQL语句的并集UNION,交集JOIN(内连接,外连接),交叉连接(CROSS JOIN笛卡尔积),差集(NOT IN)
  • 原文地址:https://www.cnblogs.com/dream-to-pku/p/7203448.html
Copyright © 2011-2022 走看看