zoukankan html css js c++ java

mapreduce 查找共同好友

A:B,C,D,F,E,O
B:A,C,E,K
C:F,A,D,I
D:A,E,F,L
E:B,C,D,M,L
F:A,B,C,D,E,O,M
G:A,C,D,E,F
H:A,C,D,E,O
I:A,O
J:B,O
K:A,C,D
L:D,E,F
M:E,F,G
O:A,H,I,J
 
求出哪些人两两之间有共同好友，及他俩的共同好友都是谁 
例如A-B：C，E
   A-E：B，C，D

一种错误的理解就是E在A的还有列表中，那么A也在E的列表，且A和E同时有的好友才是共同好友

，如果按照这个观点思考下去就简单了，但是这个是错的，因为A-E：B，C，D 这种不不满足

正确的理解是求人与人之间的共同好友，人与人之间是否是同一个好友，是否在彼此的好友列表无关。

如果这个程序不用mapreduce做那么应该是先把人全部切分出来，然后循环进行人与人的组合，组合之后将他们好友列表组合，将那些出现两次的还有找到，这些就是人与人之间的共同还有，也是人工去找共同好友的方法，

但是放在mapreuce。，，每次只能读取一行数据不能都到他行的，如果要读到其他行的就要找到一个key然后还要将其他行的数据类聚一起，这样才能读到其他行。

如果知道答案的话，这样想的话就可以避免混淆了

tom: apple,pear,banana,waterball

jerry:apple,pear

jack:banana,apple

哪些人两两之间有共同的水果，列举出两人所有的共同水果。这样大家都不会混淆了。但是工作中遇到的就是人和好友的问题，大胆的抽象成人和水果也是工作中要做的

下面链接是答案

package my.hadoop.hdfs.findFriend;

import java.io.IOException;
import java.util.StringTokenizer;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.CombineTextInputFormat;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.util.GenericOptionsParser;

 
public class FindCommonFriendOne {
     
    public static class FindFriendMapper extends
            Mapper<LongWritable, Text, Text, Text> {
        // 泛型，定义输入输出的类型
        /**
         * 友  人
         */
        Text text = new Text();
        
        @Override
        protected void map(LongWritable key, Text value, Context context)
                throws IOException, InterruptedException {
            // 将mptask传给我们的文本内容转换成String
            String line = value.toString();
            IntWritable ONE = new IntWritable(1);
            // 根据空格切分
            String[] qqAndFriend = line.split(":");//分割出QQ号
            String qq = qqAndFriend[0];
            String otherFriend = "";
            StringBuffer friendbuf = new StringBuffer(qqAndFriend[1]+",");
            
            String[] friends = qqAndFriend[1].split(",");
            for (String friend : friends) {
                //查找其他朋友
                //otherFriend = friendbuf.delete(friendbuf.indexOf(friend),friendbuf.indexOf(friend)+1).toString();
                context.write(new Text(friend), new Text(qq));                
            }
        }
    }

 
    public static class FindFriendReducer extends
            Reducer<Text, Text, Text, Text> {
         
     
        @Override
        protected void reduce(Text Keyin, Iterable<Text> values,
                Context context) throws IOException, InterruptedException {
            String qqs = "";
            for (Text val : values) {
                qqs +=val.toString() + ",";
            }
        context.write(Keyin, new Text(qqs));
        }
    }

 
    public static void main(String[] args) throws IOException,
            ClassNotFoundException, InterruptedException {
        
        Configuration configuration = new Configuration();
        Job job = Job.getInstance(configuration);
        job.setJarByClass(FindCommonFriendOne.class);
        
        job.setMapperClass(FindFriendMapper.class);
        job.setReducerClass(FindFriendReducer.class);
        //指定最终输出的数据kv类型
        job.setOutputKeyClass(Text.class);
        job.setOutputValueClass(Text.class);
        
        FileInputFormat.setInputPaths(job, new Path(args[0]));
        
        FileOutputFormat.setOutputPath(job, new Path(args[1]));
        boolean res = job.waitForCompletion(true);
        System.exit(res ? 0 :1);
    }

}

package my.hadoop.hdfs.findFriend;

import java.io.IOException;
import java.util.StringTokenizer;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.CombineTextInputFormat;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.util.GenericOptionsParser;
 
public class FindCommonFriendTwo {
 
    public static class FindFriendMapper extends
            Mapper<LongWritable, Text, Text, Text> {
        // 泛型，定义输入输出的类型
        /**
         * 友  人
         */
        Text text = new Text();
        
        @Override
        protected void map(LongWritable key, Text value, Context context)
                throws IOException, InterruptedException {
            // 将mptask传给我们的文本内容转换成String
            String line = value.toString();
            IntWritable ONE = new IntWritable(1);
            // 根据空格切分
            String[] friendAndQQ = line.split("	");//分割出QQ号
            String friend = friendAndQQ[0];
            String otherFriend = "";
            StringBuffer friendbuf = new StringBuffer(friendAndQQ[1] );
            
            String[] qqs = friendAndQQ[1].split(",");
            for (int i=0;i < qqs.length;i++) {
                //查找其他朋友
                for(int j = i+1;j<qqs.length;j++)
                {
                    //避免出现A-D 与D-A的情况
                    if(qqs[i].compareTo(qqs[j])>0)
                    {
                        context.write(new Text(qqs[i]+"-"+qqs[j]), new Text(friend));    
                    }
                    else{
                        context.write(new Text(qqs[j]+"-"+qqs[i]), new Text(friend));
                    }
                        
                }
                
                
            }
        }
    }

 
    public static class FindFriendReducer extends
            Reducer<Text, Text, Text, Text> {
         
     
        @Override
        protected void reduce(Text Keyin, Iterable<Text> values,
                Context context) throws IOException, InterruptedException {
            StringBuffer friends = new StringBuffer();
            for (Text val : values) {
                if(friends.indexOf(val.toString())<0)
                {
                    friends.append(val).append(",");
                }
            }
        context.write(Keyin, new Text(friends.toString()));
        }
    }

 
    public static void main(String[] args) throws IOException,
            ClassNotFoundException, InterruptedException {
        
        Configuration configuration = new Configuration();
        Job job = Job.getInstance(configuration);
        job.setJarByClass(FindCommonFriendTwo.class);
        
        job.setMapperClass(FindFriendMapper.class);
        job.setReducerClass(FindFriendReducer.class);
        //指定最终输出的数据kv类型
        job.setOutputKeyClass(Text.class);
        job.setOutputValueClass(Text.class);
        
        FileInputFormat.setInputPaths(job, new Path(args[0]));
        
        FileOutputFormat.setOutputPath(job, new Path(args[1]));
        boolean res = job.waitForCompletion(true);
        System.exit(res ? 0 :1);
    }

}

[hadoop@mini2 study]$ hadoop fs -cat /findfriend/output/tes0/*
A    I,K,C,B,G,F,H,O,D,
B    A,F,J,E,
C    A,E,B,H,F,G,K,
D    G,C,K,A,L,F,E,H,
E    G,M,L,H,A,F,B,D,
F    L,M,D,C,G,A,
G    M,
H    O,
I    O,C,
J    O,
K    B,
L    D,E,
M    E,F,
O    A,H,I,J,F,

[hadoop@mini2 study]$ hadoop fs -cat /findfriend/output/tes2/*
B-A    E,C,
C-A    F,D,
C-B    A,
D-A    E,F,
D-B    A,E,
D-C    F,A,
E-A    D,C,B,
E-B    C,
E-C    D,
E-D    L,
F-A    C,O,D,E,B,
F-B    C,A,E,
F-C    A,D,
F-D    E,A,
F-E    C,B,M,D,
G-A    E,D,C,F,
G-B    E,A,C,
G-C    D,F,A,
G-D    A,E,F,
G-E    D,C,
G-F    C,A,E,D,
H-A    O,E,C,D,
H-B    E,C,A,
H-C    D,A,
H-D    E,A,
H-E    C,D,
H-F    C,D,A,E,O,
H-G    C,A,E,D,
I-A    O,
I-B    A,
I-C    A,
I-D    A,
I-F    A,O,
I-G    A,
I-H    A,O,
J-A    B,O,
J-E    B,
J-F    O,B,
J-H    O,
J-I    O,
K-A    D,C,
K-B    A,C,
K-C    D,A,
K-D    A,
K-E    C,D,
K-F    D,C,A,
K-G    D,C,A,
K-H    C,D,A,
K-I    A,
L-A    E,D,F,
L-B    E,
L-C    D,F,
L-D    F,E,
L-E    D,
L-F    D,E,
L-G    E,F,D,
L-H    E,D,
L-K    D,
M-A    F,E,
M-B    E,
M-C    F,
M-D    F,E,
M-F    E,
M-G    E,F,
M-H    E,
M-L    E,F,
O-B    A,
O-C    I,A,
O-D    A,
O-F    A,
O-G    A,
O-H    A,
O-I    A,
O-K    A,

mapreduce系列（7）--查找共同好友

查看全文

相关阅读:
elk系列1之入门安装与基本操作【转】
elk系列3之通过json格式采集Nginx日志【转】
mysql开启GTID跳过错误的方法【转】
curl: (6) Couldn’t resolve host ‘www.ttlsa.com’【转】
离线下载pip包进行安装【转】
初学Memcached安装及使用【转】
http 错误代码解释 && nginx 自定义错误【转】
有关mysql的innodb_flush_log_at_trx_commit参数【转】
mysqldump 逻辑备份的正确方法【转】
谁说运维用ELK没用？我就说很有用，只是你之前不会用【转】

原文地址：https://www.cnblogs.com/rocky-AGE-24/p/6908254.html