zoukankan      html  css  js  c++  java
  • ProbS CF matlab源代码(二分系统)(原创作品,转载注明出处,谢谢!)

    %ProbS

    clear all;
    %% 数据读入与预处理

    data = load('E: etwork_papersu1.base');
    test = load('E: etwork_papersu1.test');

    R = preprocess(data.train);
    T = preprocess(test.test);


    [M,N] = size(R);
    [m,n] = size(T);

    w = resource_allocate(R,du,di);

    for u = 1:M
        index_i_n(u).id = find( R(u,:) == 0 );
    end
    %% 对每个用户u,对其所有uncollected items预测评分

    PR = zeros(M,N);
    for u = 1:M
        index_y = find( R(u,:) ~= 0 );
        vec = R(u,index_y);
        for k = 1:length(index_i_n(u).id)
            PR( u, index_i_n(u).id(k) ) = w( index_i_n(u).id(k), index_y ) * vec';
        end
    end

    value = evaluate('precision',R,PR,T,index_i_n);
    hit=hitrate(PR,T,20);
    save  predi_matrix PR;

    ------------------------------------------------------------------------------------------------

    %Preprocess

    function R = preprocess (A)
    [m,n] = size(A);
    M = max( A(:,1) );
    N = max( A(:,2) );
    B(M,N) = 0;
    for i = 1:m
            B( A(i,1), A(i,2) ) = A(i,3);
    end
    B( B < 3 ) = 0;
    B( B >= 3 ) = 1;
    R = B;

    -------------------------------------------------------------------------------------------------------------

    %evalate

    % evaluate function for multiplied rate for recommendation system
    % opt:选择的评价标准,PR:经过预评分的训练集,T:测试集,index_n:所有用户没有评价的物品的索引
    function value = evaluate(opt,R,PR,T,index_i_n)
    [m,n] = size(T);
    [M,N] = size(R);
    %% 选择评价方法
    switch (opt)

        %% 均方根差
        case {'RMSE'}
            RMSE = zeros(1,m);
            for u = 1:m
                index_tmp = index_i_n(u).id;
                index_tmp( index_tmp > n ) = [];
                len = length(index_tmp);
                vec = PR(u,index_tmp) - T(u,index_tmp);
                RMSE(u) = sqrt( sum( vec .* vec ) / len );
                if ~(mod(u,10))
                    fprintf('%d ',u);
                end
            end
            value = sum(RMSE) / length(RMSE);
            fprintf('The RMSE is: %d',value);

           
           
          %%  Pearson积矩相关系数,衡量预测评分和真实评分的线性相关程度
           % pcc在-1到1之间,越靠近1或者-1,线性相关性越好,0表示没有相关性
        case {'pcc'}
            pcc = zeros(1,m);
            for u = 1:m
                index_tmp = index_i_n(u).id;
                index_tmp( index_tmp > n ) = [];
                len = length(index_tmp);
               
                predict = PR(u,index_tmp);
                real = T(u,index_tmp);
                mean_predict = sum(predict) / len;
                mean_real = sum(real) / length(real);
               
                vec1 = predict - mean_predict;
                vec2 = real - mean_real;
                sum1 = vec1 * vec1';
                sum2 = vec2 * vec2';
                if ( sum1 ~= 0 ) && ( sum2 ~= 0 )
                    pcc(u) = vec1 * vec2' / sqrt( sum1 * sum2 );
                end
                if ~(mod(u,10))
                    fprintf('%d ',u);
                end
            end
            value = sum(pcc) / m;
            fprintf('The PCC is: %d',value);

         
           
            %% 命中率hitting rate 只适用于二值标准,如“喜欢”、“不喜欢”
        case {'hitrate'}
            [SR,index_sr] = sort(PR,2,'descend');
            rato(m,n) = 0;
            for u = 1:m
                sumu = sum(T(u,:));
                rec = 1;
                while rec <= n
                    tmp1 = index_sr(u,1:rec);
                    tmp1( tmp1 > n ) = [];
                    tmp2 = T(u,tmp1);
                    if (sumu ~= 0)
                        rato(u,rec) = sum(tmp2) / sumu;
                    end
                        rec = rec + 1;
                end
                if ~(mod(u,10))
                    fprintf('%d ',u);
                end 
            end
            value = sum(rato) / m;
         
            x = 1:length(value);
            plot(x,value,'--r');
            hold on;
            xlabel('length of recommendation list');
            ylabel('hitting rate');
           
            %% 平均排序分
        case {'rankscore'}
            [SR,index_sr] = sort(PR,2,'descend');
            %rato = zeros( 1, m );
            for u = 1:m
                len1 = length( index_i_n(u).id );
                index_i_t = find( T(u,:) == 1 );
                len2 = length( index_i_t );
                index_tmp = zeros( 1, len2 );
                if len2 ~= 0
                    for k = 1:len2
                        tmp = index_i_t(k);
                        index_tmp(k) = find( index_sr(u,:) == tmp );
                    end
                    rato(u) = sum( index_tmp / len1 ) / len2;
                end
            end
            value = sum(rato) / length(rato);
            fprintf('The average rank score is: %d ',value);
           
           %% 准确度及准确度提高比例
        case {'precision'}
            L = 10;
            [SR,index_sr] = sort(PR,2,'descend');
            list = index_sr(:,1:L);
            p = zeros(1,m);
            for u = 1:m
                index_i_t = find( T(u,:) == 1 );
                vec = intersect( index_i_t, list(u,:) );
                p(u) = numel(vec) / L;
            end
            value = sum(p) / m;
            ep = value * M * N / sum( sum(T) );
            fprintf('The precision is: %d ',value);
            fprintf('The precision enhancement is: %d ',ep);
           
            %% recall & recall enhancement
        case {'recall'}
            L = 20;
            [SR,index_sr] = sort(PR,2,'descend');
            list = index_sr(:,1:L);
            for u = 1:m
                index_i_t = find( T(u,:) == 1 );
                vec = ismember( index_i_t, list(u,:) );
                if sum( T(u,:) ) ~= 0
                    recall(u) = sum(vec) / sum( T(u,:) );
                end
            end
            value = sum(recall) / length(recall);
            er = value * M / L;
            fprintf('The recall is: %d ',value);
            fprintf('The recall enhancement is: %d ',er);
            %% personalization
        case {'personalization'}
            L = 20;
            [SR,index_sr] = sort(PR,2,'descend');
            list = index_sr(:,1:L);
            flag = 1;
            h = zeros(m,m);
            for u = 1:m
                for k = flag:m
                    tmp = intersect( list(u,:), list(k,:) );
                    h(u,k) = 1 - length( tmp ) / L;
                    h(k,u) = h(u,k);
                end
                flag = flag + 1;
            end
            value = sum( sum(h) ) / ( m^2 - m );
            fprintf('The personalization is: %d ',value);
        case {'novelty'}
            degree_i = sum( R,1 );
            L = 20;
            [SR,index_sr] = sort(PR,2,'descend');
            list = index_sr(:,1:L);
            I = zeros(1,m);
            for u = 1:m
                vec1 = degree_i( 1, list(u,:) );
                vec2 = M ./ vec1;
                mult = 1;
                for k = 1:length(vec2)
                    mult = mult * vec2(k);
                end
                I(u) = log2(mult) / L;
            end
            value = sum(I) / m;
            fprintf('The novelty is: %d ',value);         
            
           
    end 
        
    -------------------------------------------------------------------------------------------------           

    %CF

    %% 数据预处理

    clear all;
    %data = load('E: etwork_papersdatasetsJesterjeste_train');
    %test = load('E: etwork_papersdatasetsJesterjester_test');
    data = load('E: etwork_papersu1.base');
    test = load('E: etwork_papersu1.test');

    R = preprocess(data);
    T = preprocess(test);
    %{
    R=data.train;
    R(R<3)=0;
    R(R>=3)=1;
    T=test.test;
    T(T<3)=0;
    T(T>=3)=1;
    du = sum(R,2);
    di = sum(R,1);
    ex=find(du==0);
    R(ex,:)=[];
    T(ex,:)=[];
    du(ex,:)=[];
    %}

    [M,N] = size(R);
    [m,n] = size(T);
    for u = 1:M
        index_i_n(u).id = find( R(u,:) == 0 );
    end
    %% 计算出每个用户与其他用户之间的相似度

    sim = get_Sim_u(R);
    %% 预测评分

    PR = zeros(M,N);
    for u = 1:M
        index_n = find(  R(u,:) == 0 );
        for k = 1:length( index_n )
            PR( u, index_n(k) ) = predict_Rate( u, index_n(k), sim, R );
        end
    end
     value = evaluate('precision',R,PR,T,index_i_n);
     hit=hitrate(PR,T,20);


     

    请尊重原创知识,本人非常愿意与大家分享 转载请注明出处:http://www.cnblogs.com/90zeng/ 作者:博客园-90Zeng
  • 相关阅读:
    .Net之美读书笔记15
    WinForm跨线程访问控件异常
    .Net之美读书笔记14
    数据库监视器(SQL Server Profilter)
    .Net之美读书笔记13
    .Net之美读书笔记11
    .Net之美读书笔记9
    .Net之美读书笔记8
    tensorflow:验证码的识别(中)
    tensorflow:验证码的识别(上)
  • 原文地址:https://www.cnblogs.com/90zeng/p/4121645.html
Copyright © 2011-2022 走看看