zoukankan      html  css  js  c++  java
  • R语言 推荐算法 recommenderlab包

    recommend
    li_volleyball
    2016年3月20日

    library(recommenderlab)
    library(ggplot2)
    # 
    data(MovieLense)
    dim(MovieLense)
    ## [1]  943 1664
    MovieLense
    ## 943 x 1664 rating matrix of class 'realRatingMatrix' with 99392 ratings.
    image(sample(MovieLense,500),main="Raw ratings")
    
    qplot(getRatings(MovieLense),binwidth=1,main="histogram of normalized ratings",xlab = "Ratings")
    
    summary(getRatings(MovieLense))
    ##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
    ##    1.00    3.00    4.00    3.53    4.00    5.00
    #normalized ratings
    
    qplot(getRatings(normalize(MovieLense,method="Z-score")),main="hist of normalized ratings",xlab="rating")
    ## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
    
    summary(getRatings(normalize(MovieLense,method="Z-score")))
    ##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
    ## -4.8520 -0.6466  0.1084  0.0000  0.7506  4.1280
    qplot(rowCounts(MovieLense), binwidth=10, main="Movies rated on Average", xlab="# of users", ylab="# of movies rated")
     
    qplot(colMeans(MovieLense), binwidth=0.1, main="Mean ratings of Movies", xlab="Rating", ylab="# of movies")
    
    recommenderRegistry$get_entries(dataType="realRatingMatrix")
    ## $IBCF_realRatingMatrix
    ## Recommender method: IBCF
    ## Description: Recommender based on item-based collaborative filtering (real data).
    ## Parameters:
    ##    k method normalize normalize_sim_matrix alpha na_as_zero minRating
    ## 1 30 Cosine    center                FALSE   0.5      FALSE        NA
    ## 
    ## $PCA_realRatingMatrix
    ## Recommender method: PCA
    ## Description: Recommender based on PCA approximation (real data).
    ## Parameters:
    ##   categories method normalize normalize_sim_matrix alpha na_as_zero
    ## 1         20 Cosine    center                FALSE   0.5      FALSE
    ##   minRating
    ## 1        NA
    ## 
    ## $POPULAR_realRatingMatrix
    ## Recommender method: POPULAR
    ## Description: Recommender based on item popularity (real data).
    ## Parameters: None
    ## 
    ## $RANDOM_realRatingMatrix
    ## Recommender method: RANDOM
    ## Description: Produce random recommendations (real ratings).
    ## Parameters: None
    ## 
    ## $SVD_realRatingMatrix
    ## Recommender method: SVD
    ## Description: Recommender based on EM-based SVD approximation from package bcv (real data).
    ## Parameters:
    ##   approxRank maxiter normalize minRating
    ## 1         NA     100    center        NA
    ## 
    ## $UBCF_realRatingMatrix
    ## Recommender method: UBCF
    ## Description: Recommender based on user-based collaborative filtering (real data).
    ## Parameters:
    ##   method nn sample normalize minRating
    ## 1 cosine 25  FALSE    center        NA
    
    scheme <- evaluationScheme(MovieLense, method="split", train=0.9, k=1, given=10, goodRating=4)
    
    scheme
    ## Evaluation scheme with 10 items given
    ## Method: 'split' with 1 run(s).
    ## Training set proportion: 0.900
    ## Good ratings: >=4.000000
    ## Data set: 943 x 1664 rating matrix of class 'realRatingMatrix' with 99392 ratings.
    algorithms <- list(
      "random items" = list(name="RANDOM", param=list(normalize = "Z-score")),
      "popular items" = list(name="POPULAR", param=list(normalize = "Z-score")),
      "user-based CF" = list(name="UBCF", param=list(normalize = "Z-score", method="Cosine", nn=50, minRating=3)),
      "item-based CF" = list(name="IBCF", param=list(normalize = "Z-score", method="Cosine"))
    )
    
    # run algorithms, predict next n movies
    results <- evaluate(scheme, algorithms, n=c(1, 3, 5, 10, 15, 20))
    ## RANDOM run fold/sample [model time/prediction time]
    ##   1  [0.02sec/1.13sec] 
    ## POPULAR run fold/sample [model time/prediction time]
    ##   1  [0.14sec/0.2sec] 
    ## UBCF run fold/sample [model time/prediction time]
    ##   1  [0.11sec/52.33sec] 
    ## IBCF run fold/sample [model time/prediction time]
    ##   1  [348.01sec/0.66sec]
    plot(results, annotate = 1:4, legend="topleft")
     
    # See precision / recall
    plot(results, "prec/rec", annotate=3)
    
    summary(results)
    ##               Length Class             Mode
    ## random items  1      evaluationResults S4  
    ## popular items 1      evaluationResults S4  
    ## user-based CF 1      evaluationResults S4  
    ## item-based CF 1      evaluationResults S4
    print(results)
    ## List of evaluation results for 4 recommenders:
    ## Evaluation results for 1 folds/samples using method 'RANDOM'.
    ## Evaluation results for 1 folds/samples using method 'POPULAR'.
    ## Evaluation results for 1 folds/samples using method 'UBCF'.
    ## Evaluation results for 1 folds/samples using method 'IBCF'.
    library(plyr)
    result1<-ldply(avg(results))
    head(result1)
    ##            .id         TP        FP       FN       TN  precision
    ## 1 random items 0.00000000  1.000000 47.75789 1605.242 0.00000000
    ## 2 random items 0.05263158  2.947368 47.70526 1603.295 0.01754386
    ## 3 random items 0.09473684  4.905263 47.66316 1601.337 0.01894737
    ## 4 random items 0.23157895  9.768421 47.52632 1596.474 0.02315789
    ## 5 random items 0.32631579 14.673684 47.43158 1591.568 0.02175439
    ## 6 random items 0.48421053 19.515789 47.27368 1586.726 0.02421053
    ##        recall         TPR          FPR
    ## 1 0.000000000 0.000000000 0.0006231881
    ## 2 0.000420633 0.000420633 0.0018345901
    ## 3 0.001343461 0.001343461 0.0030535159
    ## 4 0.002965187 0.002965187 0.0060813035
    ## 5 0.004276282 0.004276282 0.0091353054
    ## 6 0.007966717 0.007966717 0.0121507535
    result1[,1]<-paste(result1[,1],c(1, 3, 5, 10, 15, 20))
    temp_result1<-result1[,c(1,6,7)]
    f<-function(p,r){
      return(2*p*r)/(p+r)
    }
    result1_f<-cbind(result1,f=f(temp_result1[,2],temp_result1[,3]))
    head(result1_f)
    ##               .id         TP        FP       FN       TN  precision
    ## 1  random items 1 0.00000000  1.000000 47.75789 1605.242 0.00000000
    ## 2  random items 3 0.05263158  2.947368 47.70526 1603.295 0.01754386
    ## 3  random items 5 0.09473684  4.905263 47.66316 1601.337 0.01894737
    ## 4 random items 10 0.23157895  9.768421 47.52632 1596.474 0.02315789
    ## 5 random items 15 0.32631579 14.673684 47.43158 1591.568 0.02175439
    ## 6 random items 20 0.48421053 19.515789 47.27368 1586.726 0.02421053
    ##        recall         TPR          FPR            f
    ## 1 0.000000000 0.000000000 0.0006231881 0.000000e+00
    ## 2 0.000420633 0.000420633 0.0018345901 1.475905e-05
    ## 3 0.001343461 0.001343461 0.0030535159 5.091011e-05
    ## 4 0.002965187 0.002965187 0.0060813035 1.373350e-04
    ## 5 0.004276282 0.004276282 0.0091353054 1.860558e-04
    ## 6 0.007966717 0.007966717 0.0121507535 3.857568e-04
    head(result1_f[order(-result1_f$f),])
    ##                 .id       TP        FP       FN       TN precision
    ## 18 user-based CF 20 6.094737 12.273684 41.66316 1593.968 0.3381538
    ## 17 user-based CF 15 4.978947  8.915789 42.77895 1597.326 0.3629917
    ## 16 user-based CF 10 3.684211  5.684211 44.07368 1600.558 0.3948758
    ## 12 popular items 20 5.368421 14.631579 42.38947 1591.611 0.2684211
    ## 11 popular items 15 4.421053 10.578947 43.33684 1595.663 0.2947368
    ## 15  user-based CF 5 2.157895  2.610526 45.60000 1603.632 0.4532609
    ##        recall        TPR         FPR          f
    ## 18 0.16566075 0.16566075 0.007591384 0.11203762
    ## 17 0.13829264 0.13829264 0.005510458 0.10039817
    ## 16 0.10324963 0.10324963 0.003508480 0.08154156
    ## 12 0.12821289 0.12821289 0.009037106 0.06883008
    ## 11 0.11281484 0.11281484 0.006530564 0.06650138
    ## 15 0.06796729 0.06796729 0.001609646 0.06161383
    #带入模型
    moive_re<-Recommender(MovieLense,method="UBCF")
    moives_pr<-predict(moive_re,MovieLense,n=20)
    print(as(moives_pr,"list"))
    ## [[1]]
    ##  [1] "Glory (1989)"                                                               
    ##  [2] "Schindler's List (1993)"                                                    
    ##  [3] "Casablanca (1942)"                                                          
    ##  [4] "Close Shave, A (1995)"                                                      
    ##  [5] "Dr. Strangelove or: How I Learned to Stop Worrying and Love the Bomb (1963)"
    ##  [6] "Leaving Las Vegas (1995)"                                                   
    ##  [7] "One Flew Over the Cuckoo's Nest (1975)"                                     
    ##  [8] "Rear Window (1954)"                                                         
    ##  [9] "Heathers (1989)"                                                            
    ## [10] "L.A. Confidential (1997)"                                                   
    ## [11] "City of Lost Children, The (1995)"                                          
    ## [12] "Butch Cassidy and the Sundance Kid (1969)"                                  
    ## [13] "Titanic (1997)"                                                             
    ## [14] "Lawrence of Arabia (1962)"                                                  
    ## [15] "Shine (1996)"                                                               
    ## [16] "Stand by Me (1986)"                                                         
    ## [17] "Gandhi (1982)"                                                              
    ## [18] "To Kill a Mockingbird (1962)"                                               
    ## [19] "In the Name of the Father (1993)"                                           
    ## [20] "Harold and Maude (1971)"                                                    
    ## 
    ## [[2]]
    ##  [1] "Boot, Das (1981)"                         
    ##  [2] "Dead Man Walking (1995)"                  
    ##  [3] "Lone Star (1996)"                         
    ##  [4] "Return of the Jedi (1983)"                
    ##  [5] "Celluloid Closet, The (1995)"             
    ##  [6] "Casablanca (1942)"                        
    ##  [7] "Citizen Kane (1941)"                      
    ##  [8] "Godfather: Part II, The (1974)"           
    ##  [9] "2001: A Space Odyssey (1968)"             
    ## [10] "When We Were Kings (1996)"                
    ## [11] "Diva (1981)"                              
    ## [12] "Close Shave, A (1995)"                    
    ## [13] "Tango Lesson, The (1997)"                 
    ## [14] "Beautiful Thing (1996)"                   
    ## [15] "Empire Strikes Back, The (1980)"          
    ## [16] "Mrs. Dalloway (1997)"                     
    ## [17] "Butch Cassidy and the Sundance Kid (1969)"
    ## [18] "My Fair Lady (1964)"                      
    ## [19] "Bonnie and Clyde (1967)"                  
    ## [20] "Annie Hall (1977)"                        
    ## 
    ## [[3]]
    ##  [1] "Mrs. Brown (Her Majesty, Mrs. Brown) (1997)" 
    ##  [2] "Star Wars (1977)"                            
    ##  [3] "Pulp Fiction (1994)"                         
    ##  [4] "English Patient, The (1996)"                 
    ##  [5] "Full Monty, The (1997)"                      
    ##  [6] "Lone Star (1996)"                            
    ##  [7] "Titanic (1997)"                              
    ##  [8] "Sweet Hereafter, The (1997)"                 
    ##  [9] "In the Company of Men (1997)"                
    ## [10] "Willy Wonka and the Chocolate Factory (1971)"
    ## [11] "In & Out (1997)"                             
    ## [12] "Vertigo (1958)"                              
    ## [13] "As Good As It Gets (1997)"                   
    ## [14] "Apt Pupil (1998)"                            
    ## [15] "Dazed and Confused (1993)"                   
    ## [16] "Ice Storm, The (1997)"                       
    ## [17] "This Is Spinal Tap (1984)"                   
    ## [18] "Trainspotting (1996)"                        
    ## [19] "Heat (1995)"                                 
    ## [20] "Fargo (1996)"                                
    ## 
    ## [[4]]
    ##  [1] "Titanic (1997)"               "English Patient, The (1996)" 
    ##  [3] "L.A. Confidential (1997)"     "Game, The (1997)"            
    ##  [5] "Good Will Hunting (1997)"     "Kiss the Girls (1997)"       
    ##  [7] "Full Monty, The (1997)"       "Usual Suspects, The (1995)"  
    ##  [9] "Rosewood (1997)"              "Boogie Nights (1997)"        
    ## [11] "Raise the Red Lantern (1991)" "Pulp Fiction (1994)"         
    ## [13] "Toy Story (1995)"             "Love Jones (1997)"           
    ## [15] "Eve's Bayou (1997)"           "Edge, The (1997)"            
    ## [17] "Sting, The (1973)"            "Some Like It Hot (1959)"     
    ## [19] "Strictly Ballroom (1992)"     "Soul Food (1997)"            
    ## 
    ## [[5]]
    ##  [1] "Terminator 2: Judgment Day (1991)"                                          
    ##  [2] "Terminator, The (1984)"                                                     
    ##  [3] "Usual Suspects, The (1995)"                                                 
    ##  [4] "Contact (1997)"                                                             
    ##  [5] "Braveheart (1995)"                                                          
    ##  [6] "Casablanca (1942)"                                                          
    ##  [7] "Twelve Monkeys (1995)"                                                      
    ##  [8] "Godfather, The (1972)"                                                      
    ##  [9] "Shawshank Redemption, The (1994)"                                           
    ## [10] "Raising Arizona (1987)"                                                     
    ## [11] "Amadeus (1984)"                                                             
    ## [12] "Nikita (La Femme Nikita) (1990)"                                            
    ## [13] "Reservoir Dogs (1992)"                                                      
    ## [14] "Citizen Kane (1941)"                                                        
    ## [15] "Dr. Strangelove or: How I Learned to Stop Worrying and Love the Bomb (1963)"
    ## [16] "Schindler's List (1993)"                                                    
    ## [17] "Titanic (1997)"                                                             
    ## [18] "Leaving Las Vegas (1995)"                                                   
    ## [19] "North by Northwest (1959)"                                                  
    ## [20] "Army of Darkness (1993)" 
    #S4 class 是一种标准的R语言面向对象实现方式,s4对象有明确的类定义,参数定义,参数检查,继承关系,实例化等面向对象系统的特征。
    
    ``` 专注数据分析 欢迎转载并注明出处 ```
  • 相关阅读:
    [转]C#创建Windows服务与安装
    Uploadify导致Chrome频繁崩溃Crash
    【转】asp.net mvc css/js压缩合并 combres
    jQuery解决IE6、7、8不能使用 JSON.stringify 函数的问题
    性能测试初学_loadrunner使用中遇到的问题
    linux 安装apache http server
    性能测试初学_loadrunner脚本增强
    性能测试初学_对loadrunner脚本的理解
    性能测试初学_利用cookie 绕过登录
    性能测试初学_linux 计数器
  • 原文地址:https://www.cnblogs.com/li-volleyball/p/5385320.html
Copyright © 2011-2022 走看看