zoukankan      html  css  js  c++  java
  • Hadoop实例之寻找博客中共同好友

    以下是博客的好友列表数据,冒号前是一个用户,冒号后是该用户的所有好友(数据中的好友关系是单向的)

    求出哪些人两两之间有共同好友,及他俩的共同好友都有谁?

    A:B,C,D,F,E,O
    B:A,C,E,K
    C:F,A,D,I
    D:A,E,F,L
    E:B,C,D,M,L
    F:A,B,C,D,E,O,M
    G:A,C,D,E,F
    H:A,C,D,E,O
    I:A,O
    J:B,O
    K:A,C,D
    L:D,E,F
    M:E,F,G
    O:A,H,I,J

    首先要求出ABC、….等是谁的好友

    package mapreduce;
    
    import java.io.IOException;
    import java.net.URI;
    import java.net.URISyntaxException;
    
    
    import org.apache.hadoop.conf.Configuration;
    import org.apache.hadoop.fs.FileSystem;
    import org.apache.hadoop.fs.Path;
    import org.apache.hadoop.io.LongWritable;
    import org.apache.hadoop.io.Text;
    import org.apache.hadoop.mapreduce.Job;
    import org.apache.hadoop.mapreduce.Mapper;
    import org.apache.hadoop.mapreduce.Reducer;
    import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
    import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
    
    /**
     * 
     * @author 
     * 2019.9.3
     * 找到博客中共同好友 第一步 得到ABC...是谁的好友
     *
     */
    public class FindFriend {
    
        public static class FindFriendsMapper extends Mapper<LongWritable, Text, Text, Text> {
            public  static  Text  peoples =  new  Text(); //定义输出键
            public  static  Text  friends  =  new  Text();//定义输出值
            @Override
            protected void map(LongWritable key, Text value, Mapper<LongWritable, Text, Text, Text>.Context context)
                    throws IOException, InterruptedException {
                String line = value.toString();        //读取数据并转为字符串  数据样例为A:B,C,D,F,E,O
                String array[] = line.split(":");   //处理数据,先根据:将字符串分为两部分存入数组
                String friend[]=array[1].split(",");//处理后一部分字符串,根据,分割并存入数组friend
                for(int i=0;i<friend.length;i++)//循环数组friend
                {
                    peoples.set(friend[i]);//以样例为例,将A的好友B当做输出键
                    friends.set(array[0]);//将A做为输出值
                    context.write(peoples,friends);//该样例键值对key:B value<A> 含义即为B是A的好友
                }
            }
        }
    
        public static class FindFriendsReduce extends Reducer<Text, Text, Text, Text> {
            @Override
            protected void reduce(Text key, Iterable<Text> values, Reducer<Text, Text, Text, Text>.Context context)
                    throws IOException, InterruptedException {
                String friend="";//定义一个字符串用来拼接最终结果
                for (Text value : values) {//循环values,拼接字符串,即可得到以该键值如B为好友的所有值
                    friend=friend+value+",";
                }
                context.write(key,new Text(friend));//写入
            }
        }
    
        public static void main(String[] args)
                throws IOException, ClassNotFoundException, InterruptedException, URISyntaxException {
            final String INPUT_PATH = "hdfs://192.168.68.130:9000/user/hadoop/blogfriend.txt";//读入文件地址
            final String OUTPUT_PATH = "hdfs://192.168.68.130:9000/user/hadoop/output";//输出文件地址,output不能存在
    
            Configuration conf = new Configuration();
            final FileSystem fileSystem = FileSystem.get(new URI(INPUT_PATH), conf);
            if (fileSystem.exists(new Path(OUTPUT_PATH))) {
                fileSystem.delete(new Path(OUTPUT_PATH), true);
            }
    
            Job job = Job.getInstance(conf, "Find_Friend");// 设置一个用户定义的job名称
            job.setJarByClass(Find_Friend.class);
            job.setMapperClass(FindFriendsMapper.class); // 为job设置Mapper类
            job.setReducerClass(FindFriendsReduce.class); // 为job设置Reducer类
            job.setOutputKeyClass(Text.class); // 为job的输出数据设置Key类
            job.setOutputValueClass(Text.class); // 为job输出设置value类
    
            FileInputFormat.addInputPath(job, new Path(INPUT_PATH));
            FileOutputFormat.setOutputPath(job, new Path(OUTPUT_PATH));
    
            System.exit(job.waitForCompletion(true) ? 0 : 1); // 运行job
        }
    
    }

    该段程序运行完成后,得到的结果应为:

    A    I,K,C,B,G,F,H,O,D,
    B    A,F,J,E,
    C    A,E,B,H,F,G,K,
    D    G,C,K,A,L,F,E,H,
    E    G,M,L,H,A,F,B,D,
    F    L,M,D,C,G,A,
    G    M,
    H    O,
    I    O,C,
    J    O,
    K    B,
    L    D,E,
    M    E,F,
    O    A,H,I,J,F,

    接下来来进行第二步,得到哪些人两两之间有共同好友,及他俩的共同好友都有谁

    package mapreduce;
    
    import java.io.IOException;
    import java.net.URI;
    import java.net.URISyntaxException;
    import java.util.HashSet;
    import java.util.Set;
    
    
    import org.apache.hadoop.conf.Configuration;
    import org.apache.hadoop.fs.FileSystem;
    import org.apache.hadoop.fs.Path;
    import org.apache.hadoop.io.LongWritable;
    import org.apache.hadoop.io.Text;
    import org.apache.hadoop.mapreduce.Job;
    import org.apache.hadoop.mapreduce.Mapper;
    import org.apache.hadoop.mapreduce.Reducer;
    import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
    import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
    
    public class Find_Friend {
    
        public static class FindFriendsMapper extends Mapper<LongWritable, Text, Text, Text> {
            public  static  Text  peoples =  new  Text();//定义输出键
            public  static  Text  friends  =  new  Text();//定义输出值
            @Override
            protected void map(LongWritable key, Text value, Mapper<LongWritable, Text, Text, Text>.Context context)
                    throws IOException, InterruptedException {
                String line = value.toString();//读入文件,得到样例字符串为A    I,K,C,B,G,F,H,O,D,
                String array[] = line.split("    ");//处理字符串并存入数组
                String friend[]=array[1].split(",");//处理字符串并存入数组
                //循环得到某两个具有共同好友的人,以及他们的共同好友是谁 如:I,K共同好友为A I,C共同好友为A
                for(int i=0;i<friend.length-1;i++)
                {
                    for(int j=i+1;j<friend.length;j++)
                    {
                        Set<String> set = new HashSet<String>();//这里定义一个hashset类型的数组,向hashset数组中存入数据时,会自动按照哈希数进行排序,这样就避免了如A-C,C-A的重复
                        //将具有共同好友的两个人存入数组
                        set.add(friend[i]);
                        set.add(friend[j]);
                        peoples.set(set.toString());
                        friends.set(array[0]);
                        context.write(peoples,friends);//写入
                    }
                }
            }
        }
    
        public static class FindFriendsReduce extends Reducer<Text, Text, Text, Text> {
            @Override
            protected void reduce(Text key, Iterable<Text> values, Reducer<Text, Text, Text, Text>.Context context)
                    throws IOException, InterruptedException {
                String friend="";
                for (Text value : values) {
                    friend=friend+value+" ";
                }
                context.write(key,new Text(friend));
            }
        }
    
        public static void main(String[] args)
                throws IOException, ClassNotFoundException, InterruptedException, URISyntaxException {
            final String INPUT_PATH = "hdfs://192.168.68.130:9000/user/hadoop/output/part-r-00000";//地址为第一步输出的文件地址
            final String OUTPUT_PATH = "hdfs://192.168.68.130:9000/user/hadoop/output8";
    
            Configuration conf = new Configuration();
            final FileSystem fileSystem = FileSystem.get(new URI(INPUT_PATH), conf);
            if (fileSystem.exists(new Path(OUTPUT_PATH))) {
                fileSystem.delete(new Path(OUTPUT_PATH), true);
            }
    
            Job job = Job.getInstance(conf, "Find_Friend");// 设置一个用户定义的job名称
            job.setJarByClass(Find_Friend.class);
            job.setMapperClass(FindFriendsMapper.class); // 为job设置Mapper类
            job.setReducerClass(FindFriendsReduce.class); // 为job设置Reducer类
            job.setOutputKeyClass(Text.class); // 为job的输出数据设置Key类
            job.setOutputValueClass(Text.class); // 为job输出设置value类
    
            FileInputFormat.addInputPath(job, new Path(INPUT_PATH));
            FileOutputFormat.setOutputPath(job, new Path(OUTPUT_PATH));
    
            System.exit(job.waitForCompletion(true) ? 0 : 1); // 运行job
        }
    
    }

    得到的结果最终结果为:

    [A, B]    C E 
    [A, C]    D F 
    [A, D]    F E 
    [A, E]    B D C 
    [A, F]    C B O D E 
    [A, G]    F D E C 
    [A, H]    D O E C 
    [A, I]    O 
    [A, J]    O B 
    [A, K]    D C 
    [A, L]    E D F 
    [A, M]    F E 
    [B, C]    A 
    [B, D]    E A 
    [B, E]    C 
    [B, F]    E A C 
    [B, G]    C A E 
    [B, H]    E C A 
    [B, I]    A 
    [B, K]    C A 
    [B, L]    E 
    [B, M]    E 
    [B, O]    A 
    [C, D]    F A 
    [C, E]    D 
    [C, F]    D A 
    [C, G]    D F A 
    [C, H]    A D 
    [C, I]    A 
    [C, K]    D A 
    [C, L]    D F 
    [C, M]    F 
    [C, O]    I A 
    [D, E]    L 
    [D, F]    A E 
    [D, G]    E A F 
    [D, H]    E A 
    [D, I]    A 
    [D, K]    A 
    [D, L]    E F 
    [D, M]    E F 
    [D, O]    A 
    [E, F]    B M D C 
    [E, G]    D C 
    [E, H]    D C 
    [E, J]    B 
    [E, K]    D C 
    [E, L]    D 
    [F, G]    C D A E 
    [F, H]    A E O C D 
    [F, I]    O A 
    [F, J]    O B 
    [F, K]    A C D 
    [F, L]    E D 
    [F, M]    E 
    [F, O]    A 
    [G, H]    A D E C 
    [G, I]    A 
    [G, K]    C A D 
    [G, L]    F D E 
    [G, M]    F E 
    [G, O]    A 
    [H, I]    A O 
    [H, J]    O 
    [H, K]    A D C 
    [H, L]    D E 
    [H, M]    E 
    [H, O]    A 
    [I, J]    O 
    [I, K]    A 
    [I, O]    A 
    [K, L]    D 
    [K, O]    A 
    [L, M]    F E 
  • 相关阅读:
    SpringSecurity开发
    SpringBoot 集成Spring Security
    Hexo
    gitbook使用
    Maze
    Party
    A. DZY Loves Chessboard
    1042B. Vitamins
    Petr and a Combination Lock
    433B.Kuriyama Mirai's Stones
  • 原文地址:https://www.cnblogs.com/sakura--/p/11455643.html
Copyright © 2011-2022 走看看