zoukankan      html  css  js  c++  java
  • spark MLlib collaborativeFilltering学习

     1 package ML.collaborativeFilltering;
     2 
     3 import org.apache.spark.SparkConf;
     4 import org.apache.spark.api.java.JavaDoubleRDD;
     5 import org.apache.spark.api.java.JavaPairRDD;
     6 import org.apache.spark.api.java.JavaRDD;
     7 import org.apache.spark.api.java.JavaSparkContext;
     8 import org.apache.spark.api.java.function.Function;
     9 import org.apache.spark.mllib.recommendation.ALS;
    10 import org.apache.spark.mllib.recommendation.MatrixFactorizationModel;
    11 import org.apache.spark.mllib.recommendation.Rating;
    12 import scala.Tuple2;
    13 
    14 /**
    15  * TODO
    16  *
    17  * @ClassName: example
    18  * @author: DingH
    19  * @since: 2019/4/10 16:03
    20  */
    21 public class example {
    22     public static void main(String[] args) {
    23         SparkConf conf = new SparkConf().setAppName("Java Collaborative Filtering Example");
    24         JavaSparkContext jsc = new JavaSparkContext(conf);
    25 
    26         // Load and parse the data
    27         String path = "D:\IdeaProjects\SimpleApp\src\main\resources\data\mllib\als\test.data";
    28         JavaRDD<String> data = jsc.textFile(path);
    29         JavaRDD<Rating> ratings = data.map(new Function<String, Rating>() {
    30             public Rating call(String s) {
    31                 String[] sarray = s.split(",");
    32                 return new Rating(Integer.parseInt(sarray[0]), Integer.parseInt(sarray[1]), Double.parseDouble(sarray[2]));
    33             }
    34           }
    35         );
    36         int ranks = 10;
    37         int numIterations = 10;
    38         MatrixFactorizationModel model = ALS.train(ratings.rdd(), ranks, numIterations);
    39 
    40         JavaRDD<Tuple2<Object, Object>> userProducts = ratings.map(new Function<Rating, Tuple2<Object, Object>>() {
    41             public Tuple2<Object, Object> call(Rating r) {
    42               return new Tuple2<Object, Object>(r.user(), r.product());
    43             }
    44           }
    45         );
    46         JavaPairRDD<Tuple2<Integer, Integer>, Double> predictions = JavaPairRDD.fromJavaRDD(model.predict(JavaRDD.toRDD(userProducts)).toJavaRDD().map(
    47             new Function<Rating, Tuple2<Tuple2<Integer, Integer>, Double>>() {
    48               public Tuple2<Tuple2<Integer, Integer>, Double> call(Rating r){
    49                 return new Tuple2<Tuple2<Integer, Integer>, Double>(
    50                   new Tuple2<Integer, Integer>(r.user(), r.product()), r.rating());
    51               }
    52             }
    53           ));
    54 
    55         JavaRDD<Tuple2<Double, Double>> ratesAndPreds = JavaPairRDD.fromJavaRDD(ratings.map(
    56             new Function<Rating, Tuple2<Tuple2<Integer, Integer>, Double>>() {
    57               public Tuple2<Tuple2<Integer, Integer>, Double> call(Rating r){
    58                 return new Tuple2<Tuple2<Integer, Integer>, Double>(
    59                   new Tuple2<Integer, Integer>(r.user(), r.product()), r.rating());
    60               }
    61             }
    62           )).join(predictions).values();
    63 
    64         double MSE = JavaDoubleRDD.fromRDD(ratesAndPreds.map(
    65           new Function<Tuple2<Double, Double>, Object>() {
    66             public Object call(Tuple2<Double, Double> pair) {
    67               Double err = pair._1() - pair._2();
    68               return err * err;
    69             }
    70           }
    71         ).rdd()).mean();
    72 
    73         System.out.println("Mean Squared Error = " + MSE);
    74 
    75 
    76 
    77 
    78     }
    79 }
  • 相关阅读:
    网易云易盾牵手百视通 助力广电领域新媒体内容安全
    理解DDoS防护本质:基于资源较量和规则过滤的智能化系统
    DDoS防护之TCP防护
    2017年内容安全十大事件盘点
    知物由学 | AI时代,那些黑客正在如何打磨他们的“利器”?(一)
    应对羊毛党的老手段不管用了,但有些公司依然有办法,他们是怎么做的?
    知物由学 | 未来安全隐患:AI的软肋——故意欺骗神经网络
    MYSQL数据库的数据完整性
    MYSQL是什么?
    python多线程实现多任务
  • 原文地址:https://www.cnblogs.com/dhName/p/10684234.html
Copyright © 2011-2022 走看看