zoukankan      html  css  js  c++  java
  • 推荐系统-03-简单基于用户的推荐

    下面是一个基本的JVAVA程序, RecommenderIntro.java

    package xyz.pl8.recommenderintro;
    
    import org.apache.mahout.cf.taste.impl.model.file.FileDataModel;
    import org.apache.mahout.cf.taste.impl.neighborhood.NearestNUserNeighborhood;
    import org.apache.mahout.cf.taste.impl.recommender.GenericUserBasedRecommender;
    import org.apache.mahout.cf.taste.impl.similarity.PearsonCorrelationSimilarity;
    import org.apache.mahout.cf.taste.model.DataModel;
    import org.apache.mahout.cf.taste.neighborhood.UserNeighborhood;
    import org.apache.mahout.cf.taste.recommender.RecommendedItem;
    import org.apache.mahout.cf.taste.recommender.Recommender;
    import org.apache.mahout.cf.taste.similarity.UserSimilarity;
    import java.io.File;
    import java.util.List;
    
    public class RecommenderIntro {
        public static void main(String[] args){
            try{
                // intro.csv格式 userId,itemId,rating
                DataModel model = new FileDataModel(new File("/home/hadoop/intro.csv"));
                System.out.println(model);
                // 用户相似度
                UserSimilarity similarity = new PearsonCorrelationSimilarity(model);
                // K近邻用户
                UserNeighborhood neighborhood = new NearestNUserNeighborhood(3,  similarity, model );
                // 基于用户的推荐器
                Recommender recommender = new GenericUserBasedRecommender(model, neighborhood, similarity);
                //  推荐物品
                List<RecommendedItem> recommendedItems = recommender.recommend(2, 2);
                for (RecommendedItem item : recommendedItems){
                    System.out.println(item);
                }
    
            }catch (Exception e){
                e.printStackTrace();
            }
        }
    }
    

    下面是基于物品的多线程批推荐

    package xyz.pl8.userrecommendermovielens;
    
    import org.apache.mahout.cf.taste.impl.recommender.GenericItemBasedRecommender;
    import org.apache.mahout.cf.taste.impl.similarity.LogLikelihoodSimilarity;
    import org.apache.mahout.cf.taste.impl.similarity.precompute.FileSimilarItemsWriter;
    import org.apache.mahout.cf.taste.impl.similarity.precompute.MultithreadedBatchItemSimilarities;
    import org.apache.mahout.cf.taste.model.DataModel;
    import org.apache.mahout.cf.taste.recommender.ItemBasedRecommender;
    import org.apache.mahout.cf.taste.similarity.ItemSimilarity;
    import org.apache.mahout.cf.taste.similarity.precompute.BatchItemSimilarities;
    import org.apache.mahout.cf.taste.similarity.precompute.SimilarItemsWriter;
    
    import java.io.File;
    import java.io.IOException;
    
    public class BatchItemSimilaritiesIntro {
        public static void main(String[] args) throws IOException {
            if (args.length !=1 ){
                System.err.println("Need dataset file as argument!");
                System.exit(-1);
            }
    
            File resultFile = new File(System.getProperty("java.io.tmpdir"), "similarity.csv");
            DataModel dataModel = new MovieLensDataModel(new File(args[0]));
            ItemSimilarity similarity = new LogLikelihoodSimilarity(dataModel);
            ItemBasedRecommender recommender = new GenericItemBasedRecommender(dataModel, similarity);
            BatchItemSimilarities batchItemSimilarities = new MultithreadedBatchItemSimilarities(recommender, 5);
            SimilarItemsWriter writer = new FileSimilarItemsWriter(resultFile);
            int numSimilarities = batchItemSimilarities.computeItemSimilarities(Runtime.getRuntime().availableProcessors(), 1,writer);
            System.out.println("Computed " + numSimilarities + " for " +  " items and saved them to " + resultFile.getAbsolutePath());
        }
    }
    
    
    
    package xyz.pl8.userrecommendermovielens;
    
    import org.apache.commons.io.Charsets;
    import org.apache.mahout.cf.taste.impl.model.file.FileDataModel;
    import org.apache.mahout.common.iterator.FileLineIterable;
    import org.omg.CORBA.PUBLIC_MEMBER;
    
    import java.io.*;
    import java.nio.charset.Charset;
    import java.util.regex.Pattern;
    
    public class MovieLensDataModel extends FileDataModel {
        private static String COLON_DELIMITER = "::";
        private  static Pattern COLON_DELIMITTER_PATTERN = Pattern.compile(COLON_DELIMITER);
    
        public MovieLensDataModel(File ratingsFile) throws IOException{
    
            super(convertFile(ratingsFile));
    
        }
    
        public static File convertFile(File originalFile) throws IOException{
            File resultFile = new File(System.getProperty("java.io.tmpdir"), "ratings.csv");
            if (resultFile.exists()){
                resultFile.delete();
            }
    
            try {
    
                Writer writer = new OutputStreamWriter(new FileOutputStream(resultFile), Charsets.UTF_8);
                for (String line : new FileLineIterable(originalFile, false)) {
                    int lastIndex = line.lastIndexOf(COLON_DELIMITER);
                    if (lastIndex < 0) {
                        throw new IOException("Invalid data!");
                    }
                    String subLine = line.substring(0, lastIndex);
                    String convertedSubLne = COLON_DELIMITTER_PATTERN.matcher(subLine).replaceAll(",");
                    lastIndex = convertedSubLne.lastIndexOf(",");
                    if (lastIndex <= 0) {
                        continue;
                    }
                    writer.write(convertedSubLne);
                    writer.write('
    ');
                }
                writer.close();
            }catch (Exception e){
                resultFile.delete();
    
            }
            return  resultFile;
        }
    }
    
    
  • 相关阅读:
    BZOJ4240 有趣的家庭菜园(贪心+树状数组)
    BZOJ4241 历史研究(莫队)
    BZOJ4237 稻草人(分治+树状数组+单调栈)
    BZOJ4236 JOIOJI
    洛谷 P3765 总统选举 解题报告
    洛谷 P1903 [国家集训队]数颜色 解题报告
    洛谷 P4514 上帝造题的七分钟 解题报告
    洛谷 P3302 [SDOI2013]森林 解题报告
    洛谷 P3437 [POI2006]TET-Tetris 3D 解题报告
    洛谷 P4008 [NOI2003]文本编辑器 解题报告
  • 原文地址:https://www.cnblogs.com/freebird92/p/9047488.html
Copyright © 2011-2022 走看看