zoukankan      html  css  js  c++  java
  • mapreduce 查找共同好友

    A:B,C,D,F,E,O
    B:A,C,E,K
    C:F,A,D,I
    D:A,E,F,L
    E:B,C,D,M,L
    F:A,B,C,D,E,O,M
    G:A,C,D,E,F
    H:A,C,D,E,O
    I:A,O
    J:B,O
    K:A,C,D
    L:D,E,F
    M:E,F,G
    O:A,H,I,J
     
    求出哪些人两两之间有共同好友,及他俩的共同好友都是谁 
    例如A-B:C,E
    A-E:B,C,D

    一种错误的理解就是E在A的还有列表中,那么A也在E的列表,且A和E同时有的好友才是共同好友

    ,如果按照这个观点思考下去就简单了,但是这个是错的,因为A-E:B,C,D 这种不不满足

    正确的理解是求人与人之间的共同好友,人与人之间是否是同一个好友,是否在彼此的好友列表无关。

    如果这个程序不用mapreduce做那么应该是先把人全部切分出来,然后循环进行人与人的组合,组合之后将他们好友列表组合,将那些出现两次的还有找到,这些就是人与人之间的共同还有,也是人工去找共同好友的方法,

    但是放在mapreuce。,,每次只能读取一行数据不能都到他行的,如果要读到其他行的就要找到一个key然后还要将其他行的数据类聚一起,这样才能读到其他行。

    如果知道答案的话,这样想的话就可以避免混淆了

    tom: apple,pear,banana,waterball

    jerry:apple,pear

    jack:banana,apple

    哪些人两两之间有共同的水果,列举出两人所有的共同水果。这样大家都不会混淆了。但是工作中遇到的就是人和好友的问题,大胆的抽象成人和水果也是工作中要做的

    下面链接是答案

    package my.hadoop.hdfs.findFriend;
    
    import java.io.IOException;
    import java.util.StringTokenizer;
    
    import org.apache.hadoop.conf.Configuration;
    import org.apache.hadoop.fs.Path;
    import org.apache.hadoop.io.IntWritable;
    import org.apache.hadoop.io.LongWritable;
    import org.apache.hadoop.io.Text;
    import org.apache.hadoop.mapreduce.Job;
    import org.apache.hadoop.mapreduce.Mapper;
    import org.apache.hadoop.mapreduce.Reducer;
    import org.apache.hadoop.mapreduce.lib.input.CombineTextInputFormat;
    import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
    import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
    import org.apache.hadoop.util.GenericOptionsParser;
    
     
    public class FindCommonFriendOne {
         
        public static class FindFriendMapper extends
                Mapper<LongWritable, Text, Text, Text> {
            // 泛型,定义输入输出的类型
            /**
             * 友  人
             */
            Text text = new Text();
            
            @Override
            protected void map(LongWritable key, Text value, Context context)
                    throws IOException, InterruptedException {
                // 将mptask传给我们的文本内容转换成String
                String line = value.toString();
                IntWritable ONE = new IntWritable(1);
                // 根据空格切分
                String[] qqAndFriend = line.split(":");//分割出QQ号
                String qq = qqAndFriend[0];
                String otherFriend = "";
                StringBuffer friendbuf = new StringBuffer(qqAndFriend[1]+",");
                
                String[] friends = qqAndFriend[1].split(",");
                for (String friend : friends) {
                    //查找其他朋友
                    //otherFriend = friendbuf.delete(friendbuf.indexOf(friend),friendbuf.indexOf(friend)+1).toString();
                    context.write(new Text(friend), new Text(qq));                
                }
            }
        }
    
     
        public static class FindFriendReducer extends
                Reducer<Text, Text, Text, Text> {
             
         
            @Override
            protected void reduce(Text Keyin, Iterable<Text> values,
                    Context context) throws IOException, InterruptedException {
                String qqs = "";
                for (Text val : values) {
                    qqs +=val.toString() + ",";
                }
            context.write(Keyin, new Text(qqs));
            }
        }
    
     
        public static void main(String[] args) throws IOException,
                ClassNotFoundException, InterruptedException {
            
            Configuration configuration = new Configuration();
            Job job = Job.getInstance(configuration);
            job.setJarByClass(FindCommonFriendOne.class);
            
            job.setMapperClass(FindFriendMapper.class);
            job.setReducerClass(FindFriendReducer.class);
            //指定最终输出的数据kv类型
            job.setOutputKeyClass(Text.class);
            job.setOutputValueClass(Text.class);
            
            FileInputFormat.setInputPaths(job, new Path(args[0]));
            
            FileOutputFormat.setOutputPath(job, new Path(args[1]));
            boolean res = job.waitForCompletion(true);
            System.exit(res ? 0 :1);
        }
    
    }
    package my.hadoop.hdfs.findFriend;
    
    import java.io.IOException;
    import java.util.StringTokenizer;
    
    import org.apache.hadoop.conf.Configuration;
    import org.apache.hadoop.fs.Path;
    import org.apache.hadoop.io.IntWritable;
    import org.apache.hadoop.io.LongWritable;
    import org.apache.hadoop.io.Text;
    import org.apache.hadoop.mapreduce.Job;
    import org.apache.hadoop.mapreduce.Mapper;
    import org.apache.hadoop.mapreduce.Reducer;
    import org.apache.hadoop.mapreduce.lib.input.CombineTextInputFormat;
    import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
    import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
    import org.apache.hadoop.util.GenericOptionsParser;
     
    public class FindCommonFriendTwo {
     
        public static class FindFriendMapper extends
                Mapper<LongWritable, Text, Text, Text> {
            // 泛型,定义输入输出的类型
            /**
             * 友  人
             */
            Text text = new Text();
            
            @Override
            protected void map(LongWritable key, Text value, Context context)
                    throws IOException, InterruptedException {
                // 将mptask传给我们的文本内容转换成String
                String line = value.toString();
                IntWritable ONE = new IntWritable(1);
                // 根据空格切分
                String[] friendAndQQ = line.split("	");//分割出QQ号
                String friend = friendAndQQ[0];
                String otherFriend = "";
                StringBuffer friendbuf = new StringBuffer(friendAndQQ[1] );
                
                String[] qqs = friendAndQQ[1].split(",");
                for (int i=0;i < qqs.length;i++) {
                    //查找其他朋友
                    for(int j = i+1;j<qqs.length;j++)
                    {
                        //避免出现A-D 与D-A的情况
                        if(qqs[i].compareTo(qqs[j])>0)
                        {
                            context.write(new Text(qqs[i]+"-"+qqs[j]), new Text(friend));    
                        }
                        else{
                            context.write(new Text(qqs[j]+"-"+qqs[i]), new Text(friend));
                        }
                            
                    }
                    
                    
                }
            }
        }
    
     
        public static class FindFriendReducer extends
                Reducer<Text, Text, Text, Text> {
             
         
            @Override
            protected void reduce(Text Keyin, Iterable<Text> values,
                    Context context) throws IOException, InterruptedException {
                StringBuffer friends = new StringBuffer();
                for (Text val : values) {
                    if(friends.indexOf(val.toString())<0)
                    {
                        friends.append(val).append(",");
                    }
                }
            context.write(Keyin, new Text(friends.toString()));
            }
        }
    
     
        public static void main(String[] args) throws IOException,
                ClassNotFoundException, InterruptedException {
            
            Configuration configuration = new Configuration();
            Job job = Job.getInstance(configuration);
            job.setJarByClass(FindCommonFriendTwo.class);
            
            job.setMapperClass(FindFriendMapper.class);
            job.setReducerClass(FindFriendReducer.class);
            //指定最终输出的数据kv类型
            job.setOutputKeyClass(Text.class);
            job.setOutputValueClass(Text.class);
            
            FileInputFormat.setInputPaths(job, new Path(args[0]));
            
            FileOutputFormat.setOutputPath(job, new Path(args[1]));
            boolean res = job.waitForCompletion(true);
            System.exit(res ? 0 :1);
        }
    
    }
    [hadoop@mini2 study]$ hadoop fs -cat /findfriend/output/tes0/*
    A    I,K,C,B,G,F,H,O,D,
    B    A,F,J,E,
    C    A,E,B,H,F,G,K,
    D    G,C,K,A,L,F,E,H,
    E    G,M,L,H,A,F,B,D,
    F    L,M,D,C,G,A,
    G    M,
    H    O,
    I    O,C,
    J    O,
    K    B,
    L    D,E,
    M    E,F,
    O    A,H,I,J,F,
    [hadoop@mini2 study]$ hadoop fs -cat /findfriend/output/tes2/*
    B-A    E,C,
    C-A    F,D,
    C-B    A,
    D-A    E,F,
    D-B    A,E,
    D-C    F,A,
    E-A    D,C,B,
    E-B    C,
    E-C    D,
    E-D    L,
    F-A    C,O,D,E,B,
    F-B    C,A,E,
    F-C    A,D,
    F-D    E,A,
    F-E    C,B,M,D,
    G-A    E,D,C,F,
    G-B    E,A,C,
    G-C    D,F,A,
    G-D    A,E,F,
    G-E    D,C,
    G-F    C,A,E,D,
    H-A    O,E,C,D,
    H-B    E,C,A,
    H-C    D,A,
    H-D    E,A,
    H-E    C,D,
    H-F    C,D,A,E,O,
    H-G    C,A,E,D,
    I-A    O,
    I-B    A,
    I-C    A,
    I-D    A,
    I-F    A,O,
    I-G    A,
    I-H    A,O,
    J-A    B,O,
    J-E    B,
    J-F    O,B,
    J-H    O,
    J-I    O,
    K-A    D,C,
    K-B    A,C,
    K-C    D,A,
    K-D    A,
    K-E    C,D,
    K-F    D,C,A,
    K-G    D,C,A,
    K-H    C,D,A,
    K-I    A,
    L-A    E,D,F,
    L-B    E,
    L-C    D,F,
    L-D    F,E,
    L-E    D,
    L-F    D,E,
    L-G    E,F,D,
    L-H    E,D,
    L-K    D,
    M-A    F,E,
    M-B    E,
    M-C    F,
    M-D    F,E,
    M-F    E,
    M-G    E,F,
    M-H    E,
    M-L    E,F,
    O-B    A,
    O-C    I,A,
    O-D    A,
    O-F    A,
    O-G    A,
    O-H    A,
    O-I    A,
    O-K    A,

    mapreduce系列(7)--查找共同好友

  • 相关阅读:
    博客园二号地址:
    VisionMaster4.0.0二次开发教程(每日更新博客)
    5.观察者模式
    4.策略模式
    微信小程序radio的样式修改
    Linux下mysql安装
    idea2020 设置Run Dashboard
    一个方便统计页面 PV/UV、触发和交互的轻量级前端埋点工具
    登录认证-实名认证-产品设计
    启示录—产品经理
  • 原文地址:https://www.cnblogs.com/rocky-AGE-24/p/6908254.html
Copyright © 2011-2022 走看看