zoukankan      html  css  js  c++  java
  • Mahout使用(一)

    1.HelloMahout.java
    2.DistanceTest.java
    3.MahoutDemo.java

    1.HelloMahout.java

     1 package cn.crxy.mahout;
     2 
     3 import java.io.File;
     4 import java.util.List;
     5 
     6 import org.apache.log4j.Logger;
     7 import org.apache.mahout.cf.taste.impl.model.file.FileDataModel;
     8 import org.apache.mahout.cf.taste.impl.neighborhood.NearestNUserNeighborhood;
     9 import org.apache.mahout.cf.taste.impl.recommender.GenericUserBasedRecommender;
    10 import org.apache.mahout.cf.taste.impl.similarity.PearsonCorrelationSimilarity;
    11 import org.apache.mahout.cf.taste.model.DataModel;
    12 import org.apache.mahout.cf.taste.neighborhood.UserNeighborhood;
    13 import org.apache.mahout.cf.taste.recommender.RecommendedItem;
    14 import org.apache.mahout.cf.taste.recommender.Recommender;
    15 import org.apache.mahout.cf.taste.similarity.UserSimilarity;
    16 
    17 public class HelloMahout {
    18 
    19     public static void main(String[] args) {
    20         
    21         Logger logger=Logger.getLogger(HelloMahout.class);
    22         try {
    23             //读取用户评分数据    封装成一个model
    24             DataModel model = new FileDataModel(new File("F:\360Downloads\超人学院\第14期视频\2016-09-12【mahout】\样本数据\info.csv"));
    25             // 根据相似度找出对应的好朋友的标准     物以类聚,人以群分
    26             UserSimilarity userSimilarity = new PearsonCorrelationSimilarity(model);
    27             // 邻域 选择两个好朋友帮我推荐
    28             UserNeighborhood userNeighborhood = new NearestNUserNeighborhood(2,userSimilarity, model);
    29             // 构建推荐引擎
    30             Recommender recommender = new GenericUserBasedRecommender(model,userNeighborhood, userSimilarity);
    31             // 进行推荐
    32             List<RecommendedItem> recommend = recommender.recommend(1, 5);
    33             for (RecommendedItem item : recommend) {
    34                 logger.info(item);
    35             }
    36         } catch (Exception e) {
    37             logger.error(e.getMessage());
    38         }
    39     }
    40 }
    View Code

    2.DistanceTest.java

     1 package cn.crxy.mahout;
     2 
     3 import org.junit.Before;
     4 import org.junit.Test;
     5 
     6 public class DistanceTest {
     7     
     8     //    水果维度依次为:苹果、梨、桃子、栗子、香蕉、橘子
     9     //    小明:5,4,2,1,5,5
    10     //    小丽:5,3,1,2,1,1
    11     //    小王:5,3,4,1,4,3
    12     private int[] a;
    13     private int[] b;
    14     private int[] c;
    15     
    16     @Before
    17     public void initData(){
    18         a=new int[]{5,4,2,1,5,5};
    19         b=new int[]{5,3,1,2,1,1};
    20         c=new int[]{5,3,4,1,4,3};
    21     }
    22     
    23     @Test
    24     public void Distance(){
    25 //        a-b:5.916079783099616
    26 //        a-c:3.1622776601683795
    27 //        c-b:4.795831523312719
    28 
    29         System.out.println(String.format("a-b:%s", 1.0/(1.0+Man(a, b))));
    30         System.out.println(String.format("a-c:%s", 1.0/(1.0+Man(a, c))));
    31         System.out.println(String.format("c-b:%s", 1.0/(1.0+Man(c, b))));
    32 //        a-b:0.08333333333333333
    33 //        a-c:0.14285714285714285
    34 //        c-b:0.1
    35         
    36     }
    37     //欧式距离
    38     private double ErluD(int[] a_array,int[] b_array){
    39         double result=0;
    40         for (int i = 0; i < a_array.length; i++) {
    41             result+=Math.pow(a_array[i]-b_array[i],2);
    42         }
    43         return Math.sqrt(result);
    44     }
    45     //曼哈顿距离
    46     private double Man(int[] a_array,int[] b_array){
    47         double result=0;
    48         for (int i = 0; i < a_array.length; i++) {
    49             result+=Math.abs(a_array[i]-b_array[i]);
    50         }
    51         return result;
    52     }
    53     //min式距离
    54     private double Min(int[] a_array,int[] b_array,int p){
    55         double result=0;
    56         for (int i = 0; i < a_array.length; i++) {
    57             result+=Math.pow(Math.abs(a_array[i]-b_array[i]),p);
    58         }
    59         return Math.pow(result,1.0/p);
    60     }
    61 
    62 }
    View Code

    3.MahoutDemo.java

      1 package cn.crxy.mahout;
      2 
      3 import java.io.File;
      4 import java.util.List;
      5 
      6 import org.apache.mahout.cf.taste.impl.common.FastByIDMap;
      7 import org.apache.mahout.cf.taste.impl.common.FastIDSet;
      8 import org.apache.mahout.cf.taste.impl.model.GenericPreference;
      9 import org.apache.mahout.cf.taste.impl.model.GenericUserPreferenceArray;
     10 import org.apache.mahout.cf.taste.impl.model.file.FileDataModel;
     11 import org.apache.mahout.cf.taste.impl.neighborhood.NearestNUserNeighborhood;
     12 import org.apache.mahout.cf.taste.impl.neighborhood.ThresholdUserNeighborhood;
     13 import org.apache.mahout.cf.taste.impl.recommender.GenericBooleanPrefItemBasedRecommender;
     14 import org.apache.mahout.cf.taste.impl.recommender.GenericItemBasedRecommender;
     15 import org.apache.mahout.cf.taste.impl.recommender.GenericUserBasedRecommender;
     16 import org.apache.mahout.cf.taste.impl.similarity.CachingItemSimilarity;
     17 import org.apache.mahout.cf.taste.impl.similarity.CachingUserSimilarity;
     18 import org.apache.mahout.cf.taste.impl.similarity.PearsonCorrelationSimilarity;
     19 import org.apache.mahout.cf.taste.impl.similarity.TanimotoCoefficientSimilarity;
     20 import org.apache.mahout.cf.taste.model.DataModel;
     21 import org.apache.mahout.cf.taste.model.PreferenceArray;
     22 import org.apache.mahout.cf.taste.neighborhood.UserNeighborhood;
     23 import org.apache.mahout.cf.taste.recommender.RecommendedItem;
     24 import org.apache.mahout.cf.taste.recommender.Recommender;
     25 import org.apache.mahout.cf.taste.similarity.ItemSimilarity;
     26 import org.apache.mahout.cf.taste.similarity.UserSimilarity;
     27 import org.junit.Before;
     28 import org.junit.Ignore;
     29 import org.junit.Test;
     30 
     31 public class MahoutDemo {
     32 
     33     //组装datamodel
     34     
     35     // userid itemid score
     36     // 101 102 103 104
     37     // 1(5,4,2,)
     38     // 2(,2,4,1)
     39     // 3(4,3,1,)
     40     DataModel dataModel;
     41     
     42     @Before
     43     public void initData() throws Exception{
     44         //每一个用户的喜好列表 key:用户id  value:该用户的偏好列表
     45         FastByIDMap<PreferenceArray> data=new FastByIDMap<PreferenceArray>();
     46         //组装第一个用户 偏好列表
     47         PreferenceArray array1=new GenericUserPreferenceArray(3);
     48         //PreferenceArray index 指:偏好列表的index 序号。
     49         array1.setUserID(0, 1);
     50         array1.setItemID(0, 101);
     51         array1.setValue(0, 5);
     52         
     53         array1.setUserID(1, 1);
     54         array1.setItemID(1, 102);
     55         array1.setValue(1, 4);
     56         
     57         array1.setUserID(2, 1);
     58         array1.setItemID(2, 103);
     59         array1.setValue(2, 2);
     60         
     61         data.put(1, array1);
     62         
     63         //组装第二个喜好
     64         PreferenceArray array2=new GenericUserPreferenceArray(3);
     65         //2(,2,4,1)
     66         array2.set(0, new GenericPreference(2,102,2));
     67         array2.set(1, new GenericPreference(2,103,4));
     68         array2.set(2, new GenericPreference(2,104,1));
     69         data.put(2, array2);
     70         //组装第三个喜好
     71         PreferenceArray array3=new GenericUserPreferenceArray(3);
     72         //3(4,3,1,)
     73         array3.set(0, new GenericPreference(3,101,4));
     74         array3.set(1, new GenericPreference(3,102,3));
     75         array3.set(2, new GenericPreference(3,103,1));
     76         data.put(3, array3);
     77         
     78         //dataModel=new GenericDataModel(data);
     79 //        dataModel=new GenericBooleanPrefDataModel(userData);
     80 //        System.out.println(dataModel.getPreferenceValue(1, 102));//获得1用户对102的评分
     81 //        System.out.println(dataModel.getItemIDsFromUser(1));
     82 //        System.out.println(dataModel.getUserIDs());
     83         
     84         
     85         //1  101 102 103
     86         //2 102 103 
     87         // key为userid value:物品的集合 set
     88         FastByIDMap<FastIDSet> userData=new FastByIDMap<FastIDSet>();
     89         
     90         FastIDSet userSet1=new FastIDSet(3);
     91         userSet1.add(101);
     92         userSet1.add(102);
     93         userSet1.add(103);
     94         userData.put(1,userSet1);
     95         
     96         FastIDSet userSet2=new FastIDSet(2);
     97         userSet2.add(102);
     98         userSet2.add(103);
     99         userData.put(2,userSet2);
    100         
    101         
    102         //无偏好的构建
    103 //        dataModel=new GenericBooleanPrefDataModel(userData);
    104         
    105         
    106         //读取文件 有偏好的
    107         dataModel=new FileDataModel(new File("F:\360Downloads\超人学院\第14期视频\2016-09-12【mahout】\样本数据\info.csv"));
    108         //读取文件 无偏好的 无偏好的数据只有用户和其关联的商品 没有对应商品的评分
    109 //        dataModel=new FileDataModel(new File("F:\360Downloads\超人学院\第14期视频\2016-09-12【mahout】\样本数据\ubool.data"));
    110         
    111         
    112 //        对于无偏好数据:getvalue:如果存在记录则是1.0;否则为null。  
    113 //        System.out.println(dataModel.getPreferenceValue(1, 103));
    114 //        System.out.println(dataModel.getItemIDsFromUser(1));
    115 //        System.out.println(dataModel.getUserIDs());
    116         
    117     }
    118     @Ignore
    119     public void testUserSimi() throws Exception{
    120         
    121         //利用model和相似度函数 计算用户相似度
    122 //        UserSimilarity userSimilarity=new TanimotoCoefficientSimilarity(dataModel);
    123         UserSimilarity userSimilarity=new PearsonCorrelationSimilarity(dataModel);
    124         userSimilarity=new CachingUserSimilarity(userSimilarity, dataModel);
    125         //查询用户之间的相似度  0.9999999999999998    0.944911182523068
    126         //如果使用CachingUserSimilarity userSimilarity(1,5) 第二次不会再次计算了
    127         System.out.println(userSimilarity.userSimilarity(1, 5));
    128         System.out.println(userSimilarity.userSimilarity(1, 5));
    129     }
    130     @Ignore
    131     public void testItemSimi() throws Exception{
    132         
    133         //利用model和相似度函数 计算物品相似度
    134         ItemSimilarity itemSimilarity=new PearsonCorrelationSimilarity(dataModel);
    135         itemSimilarity =new CachingItemSimilarity(itemSimilarity,dataModel);
    136         //查询物品之间的相似度 0.9449111825230729
    137         System.out.println(itemSimilarity.itemSimilarity(101, 102));
    138     }
    139     @Test
    140     public void testuserNeighborhood() throws Exception{
    141         //相似度   有相似度才能算邻居是谁
    142         UserSimilarity userSimilarity=new PearsonCorrelationSimilarity(dataModel);
    143         //1.固定数目的邻居  如果取邻居 只取前三个 
    144         UserNeighborhood userNeighborhood=new NearestNUserNeighborhood(3,userSimilarity,dataModel);
    145         long[] userNeighborhoods = userNeighborhood.getUserNeighborhood(1);//为1用户取得用户
    146         for (long l : userNeighborhoods) {
    147             System.out.println(l+"NearestNUserNeighborhoodsimi---"+userSimilarity.userSimilarity(1, l));
    148         }
    149 //        4NearestNUserNeighborhoodsimi---0.9999999999999998
    150 //        5NearestNUserNeighborhoodsimi---0.944911182523068
    151 //        2NearestNUserNeighborhoodsimi---   -0.7642652566278799这个是负0.7
    152 
    153     
    154         //2.固定阈值的邻居  只要0.8以上的
    155         userNeighborhood=new ThresholdUserNeighborhood(0.7,userSimilarity,dataModel);
    156         long[] userNeighborhoodsnew = userNeighborhood.getUserNeighborhood(1);
    157         System.out.println(userSimilarity.userSimilarity(1, 2)); //查看1和2的相似度
    158         for (long l : userNeighborhoodsnew) {
    159             System.out.println(l+"ThresholdUserNeighborhoodsimi---"+userSimilarity.userSimilarity(1, l));
    160         }
    161         
    162     }
    163     @Test
    164     public void testItemCmd() throws Exception{
    165         //1.基于物品的有偏好的推荐   基于物品的不需要邻居
    166 //        ItemSimilarity itemSimilarity=new PearsonCorrelationSimilarity(dataModel);
    167 //        Recommender recommender=new GenericItemBasedRecommender(dataModel,itemSimilarity);
    168         
    169         //2.基于物品的无偏好推荐
    170         ItemSimilarity itemSimilarity=new TanimotoCoefficientSimilarity(dataModel);
    171         Recommender recommender=new GenericBooleanPrefItemBasedRecommender(dataModel,itemSimilarity);
    172         
    173         
    174         List<RecommendedItem> recommend = recommender.recommend(1, 3);//给用户1推荐3个.
    175         for (RecommendedItem recommendedItem : recommend) {
    176             System.out.println(recommendedItem);
    177             //1.基于物品的有偏好的推荐RecommendedItem[item:104, value:5.0]其他的推荐不出来了....所以只推荐出了1个
    178             
    179             //2.基于物品的无偏好的推荐
    180             //RecommendedItem[item:104, value:1.8]
    181             //RecommendedItem[item:106, value:1.15]
    182             //RecommendedItem[item:105, value:0.85]
    183         }
    184     }
    185     @Test
    186     public void testUserCmd() throws Exception{
    187         //1.基于用户的有偏好的推荐
    188         //UserSimilarity userSimilarity=new PearsonCorrelationSimilarity(dataModel);
    189         //2.基于用户的无偏好的推荐
    190         UserSimilarity userSimilarity=new TanimotoCoefficientSimilarity(dataModel);
    191         
    192         UserNeighborhood userNeighborhood=new NearestNUserNeighborhood(3,userSimilarity,dataModel);//Top 3
    193         //构建推荐对象
    194         Recommender recommender=new GenericUserBasedRecommender(dataModel,userNeighborhood,userSimilarity);
    195         List<RecommendedItem> recommend = recommender.recommend(1, 3);
    196         for (RecommendedItem recommendedItem : recommend) {
    197             System.out.println(recommendedItem);
    198             //1.基于用户的有偏好推荐
    199             //RecommendedItem[item:104, value:5.0]
    200             //RecommendedItem[item:106, value:4.0]
    201             //2.基于用户的无偏好推荐
    202             //RecommendedItem[item:106, value:4.0]
    203             //RecommendedItem[item:104, value:3.2121212]
    204 
    205         }
    206     }
    207     
    208     
    209 }
    View Code
  • 相关阅读:
    linux查看硬件信息的方法
    linux最常用命令
    研究php单例模式实现数据库类
    HTML5语义元素
    第一次博客作业
    2020系统综合实践 第7次实践作业 06组
    2020系统综合实践 第6次实践作业 06组
    2020系统综合实践 第5次实践作业
    2020系统综合实践 第4次实践作业
    2020系统综合实践 第3次实践作业
  • 原文地址:https://www.cnblogs.com/DreamDrive/p/5931126.html
Copyright © 2011-2022 走看看