zoukankan      html  css  js  c++  java
  • 向MapReduce转换:生成用户向量

    分两部分:

    <span style="font-size:18px;">/***
     * @author YangXin
     * @date 2016/2/21
     * @ info 主要功能是mahout实现解析Wikipedia链接文件的Mapper接口
     */
    package unitSix;
    import java.io.IOException;
    import java.util.regex.Matcher;
    import java.util.regex.Pattern;
    import org.apache.hadoop.io.LongWritable;
    import org.apache.hadoop.io.Text;
    import org.apache.hadoop.mapreduce.Mapper;
    import org.apache.mahout.math.VarLongWritable;
    
    public class WikipediaToItemPrefsMapper extends Mapper<LongWritable, Text, VarLongWritable, VarLongWritable>{
    	private static final Pattern NUMBERS = Pattern.compile("(\d+)");
    	public void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException{
    		String line = value.toString();
    		Matcher m = NUMBERS.matcher(line);
    		//定位用户ID
    		m.find();                              
    		VarLongWritable userID = new VarLongWritable(Long.parseLong(m.group()));
    		VarLongWritable itemID = new VarLongWritable();
    		while(m.find()){
    			itemID.set(Long.parseLong(m.group()));
    			//为每一个物品ID生成用户-物品对
    			context.write(userID, itemID);
    		}
    	}
    }</span>



    <strong><span style="font-size:18px;">/***
     * @author YangXin
     * @info 功能是mahout实现从用户物品偏好中生成Vector的reducer接口
     */
    package unitSix;
    import java.io.IOException;
    import org.apache.hadoop.mapreduce.Reducer;
    import org.apache.mahout.math.RandomAccessSparseVector;
    import org.apache.mahout.math.VarLongWritable;
    import org.apache.mahout.math.Vector;
    import org.apache.mahout.math.VectorWritable;
    
    public class WikipediaToUserVectorReducer extends Reducer<VarLongWritable, VarLongWritable, VarLongWritable, VectorWritable>{
    	public void reduce(VarLongWritable userID, Iterable<VarLongWritable> itemPrefs, Context context) throws IOException, InterruptedException{
    		Vector userVector = new RandomAccessSparseVector(Integer.MAX_VALUE, 100);
    		for(VarLongWritable itemPref : itemPrefs){
    			userVector.set((int)itemPref.get(), 1.0f);
    		}
    		context.write(userID, new VectorWritable(userVector));
    	}
    }
    </span></strong>

  • 相关阅读:
    如何解决Pulling without specifying how to reconcile divergent branches
    Mac设置终端打开快捷键
    Mac 息屏快捷键
    Windows安装使用Openssl
    tomcat证书转换成nginx证书。jks/keystore > crt/key
    Windows下类似Linux的CAT命令是什么
    齐文词根词缀---3.23、co-(放在元音前面)表示共同,(和com和con一个意思)
    齐文词根词缀---3.22、clus-关闭(就是close)
    齐文词根词缀---3.21、clam/claim-喊
    齐文词根词缀---3.20、cis-切、割(同cid)
  • 原文地址:https://www.cnblogs.com/zsychanpin/p/7199868.html
Copyright © 2011-2022 走看看