zoukankan      html  css  js  c++  java
  • ProbS CF matlab源代码(二分系统)(原创作品,转载注明出处,谢谢!)

    %ProbS

    clear all;
    %% 数据读入与预处理

    data = load('E: etwork_papersu1.base');
    test = load('E: etwork_papersu1.test');

    R = preprocess(data.train);
    T = preprocess(test.test);


    [M,N] = size(R);
    [m,n] = size(T);

    w = resource_allocate(R,du,di);

    for u = 1:M
        index_i_n(u).id = find( R(u,:) == 0 );
    end
    %% 对每个用户u,对其所有uncollected items预测评分

    PR = zeros(M,N);
    for u = 1:M
        index_y = find( R(u,:) ~= 0 );
        vec = R(u,index_y);
        for k = 1:length(index_i_n(u).id)
            PR( u, index_i_n(u).id(k) ) = w( index_i_n(u).id(k), index_y ) * vec';
        end
    end

    value = evaluate('precision',R,PR,T,index_i_n);
    hit=hitrate(PR,T,20);
    save  predi_matrix PR;

    ------------------------------------------------------------------------------------------------

    %Preprocess

    function R = preprocess (A)
    [m,n] = size(A);
    M = max( A(:,1) );
    N = max( A(:,2) );
    B(M,N) = 0;
    for i = 1:m
            B( A(i,1), A(i,2) ) = A(i,3);
    end
    B( B < 3 ) = 0;
    B( B >= 3 ) = 1;
    R = B;

    -------------------------------------------------------------------------------------------------------------

    %evalate

    % evaluate function for multiplied rate for recommendation system
    % opt:选择的评价标准,PR:经过预评分的训练集,T:测试集,index_n:所有用户没有评价的物品的索引
    function value = evaluate(opt,R,PR,T,index_i_n)
    [m,n] = size(T);
    [M,N] = size(R);
    %% 选择评价方法
    switch (opt)

        %% 均方根差
        case {'RMSE'}
            RMSE = zeros(1,m);
            for u = 1:m
                index_tmp = index_i_n(u).id;
                index_tmp( index_tmp > n ) = [];
                len = length(index_tmp);
                vec = PR(u,index_tmp) - T(u,index_tmp);
                RMSE(u) = sqrt( sum( vec .* vec ) / len );
                if ~(mod(u,10))
                    fprintf('%d ',u);
                end
            end
            value = sum(RMSE) / length(RMSE);
            fprintf('The RMSE is: %d',value);

           
           
          %%  Pearson积矩相关系数,衡量预测评分和真实评分的线性相关程度
           % pcc在-1到1之间,越靠近1或者-1,线性相关性越好,0表示没有相关性
        case {'pcc'}
            pcc = zeros(1,m);
            for u = 1:m
                index_tmp = index_i_n(u).id;
                index_tmp( index_tmp > n ) = [];
                len = length(index_tmp);
               
                predict = PR(u,index_tmp);
                real = T(u,index_tmp);
                mean_predict = sum(predict) / len;
                mean_real = sum(real) / length(real);
               
                vec1 = predict - mean_predict;
                vec2 = real - mean_real;
                sum1 = vec1 * vec1';
                sum2 = vec2 * vec2';
                if ( sum1 ~= 0 ) && ( sum2 ~= 0 )
                    pcc(u) = vec1 * vec2' / sqrt( sum1 * sum2 );
                end
                if ~(mod(u,10))
                    fprintf('%d ',u);
                end
            end
            value = sum(pcc) / m;
            fprintf('The PCC is: %d',value);

         
           
            %% 命中率hitting rate 只适用于二值标准,如“喜欢”、“不喜欢”
        case {'hitrate'}
            [SR,index_sr] = sort(PR,2,'descend');
            rato(m,n) = 0;
            for u = 1:m
                sumu = sum(T(u,:));
                rec = 1;
                while rec <= n
                    tmp1 = index_sr(u,1:rec);
                    tmp1( tmp1 > n ) = [];
                    tmp2 = T(u,tmp1);
                    if (sumu ~= 0)
                        rato(u,rec) = sum(tmp2) / sumu;
                    end
                        rec = rec + 1;
                end
                if ~(mod(u,10))
                    fprintf('%d ',u);
                end 
            end
            value = sum(rato) / m;
         
            x = 1:length(value);
            plot(x,value,'--r');
            hold on;
            xlabel('length of recommendation list');
            ylabel('hitting rate');
           
            %% 平均排序分
        case {'rankscore'}
            [SR,index_sr] = sort(PR,2,'descend');
            %rato = zeros( 1, m );
            for u = 1:m
                len1 = length( index_i_n(u).id );
                index_i_t = find( T(u,:) == 1 );
                len2 = length( index_i_t );
                index_tmp = zeros( 1, len2 );
                if len2 ~= 0
                    for k = 1:len2
                        tmp = index_i_t(k);
                        index_tmp(k) = find( index_sr(u,:) == tmp );
                    end
                    rato(u) = sum( index_tmp / len1 ) / len2;
                end
            end
            value = sum(rato) / length(rato);
            fprintf('The average rank score is: %d ',value);
           
           %% 准确度及准确度提高比例
        case {'precision'}
            L = 10;
            [SR,index_sr] = sort(PR,2,'descend');
            list = index_sr(:,1:L);
            p = zeros(1,m);
            for u = 1:m
                index_i_t = find( T(u,:) == 1 );
                vec = intersect( index_i_t, list(u,:) );
                p(u) = numel(vec) / L;
            end
            value = sum(p) / m;
            ep = value * M * N / sum( sum(T) );
            fprintf('The precision is: %d ',value);
            fprintf('The precision enhancement is: %d ',ep);
           
            %% recall & recall enhancement
        case {'recall'}
            L = 20;
            [SR,index_sr] = sort(PR,2,'descend');
            list = index_sr(:,1:L);
            for u = 1:m
                index_i_t = find( T(u,:) == 1 );
                vec = ismember( index_i_t, list(u,:) );
                if sum( T(u,:) ) ~= 0
                    recall(u) = sum(vec) / sum( T(u,:) );
                end
            end
            value = sum(recall) / length(recall);
            er = value * M / L;
            fprintf('The recall is: %d ',value);
            fprintf('The recall enhancement is: %d ',er);
            %% personalization
        case {'personalization'}
            L = 20;
            [SR,index_sr] = sort(PR,2,'descend');
            list = index_sr(:,1:L);
            flag = 1;
            h = zeros(m,m);
            for u = 1:m
                for k = flag:m
                    tmp = intersect( list(u,:), list(k,:) );
                    h(u,k) = 1 - length( tmp ) / L;
                    h(k,u) = h(u,k);
                end
                flag = flag + 1;
            end
            value = sum( sum(h) ) / ( m^2 - m );
            fprintf('The personalization is: %d ',value);
        case {'novelty'}
            degree_i = sum( R,1 );
            L = 20;
            [SR,index_sr] = sort(PR,2,'descend');
            list = index_sr(:,1:L);
            I = zeros(1,m);
            for u = 1:m
                vec1 = degree_i( 1, list(u,:) );
                vec2 = M ./ vec1;
                mult = 1;
                for k = 1:length(vec2)
                    mult = mult * vec2(k);
                end
                I(u) = log2(mult) / L;
            end
            value = sum(I) / m;
            fprintf('The novelty is: %d ',value);         
            
           
    end 
        
    -------------------------------------------------------------------------------------------------           

    %CF

    %% 数据预处理

    clear all;
    %data = load('E: etwork_papersdatasetsJesterjeste_train');
    %test = load('E: etwork_papersdatasetsJesterjester_test');
    data = load('E: etwork_papersu1.base');
    test = load('E: etwork_papersu1.test');

    R = preprocess(data);
    T = preprocess(test);
    %{
    R=data.train;
    R(R<3)=0;
    R(R>=3)=1;
    T=test.test;
    T(T<3)=0;
    T(T>=3)=1;
    du = sum(R,2);
    di = sum(R,1);
    ex=find(du==0);
    R(ex,:)=[];
    T(ex,:)=[];
    du(ex,:)=[];
    %}

    [M,N] = size(R);
    [m,n] = size(T);
    for u = 1:M
        index_i_n(u).id = find( R(u,:) == 0 );
    end
    %% 计算出每个用户与其他用户之间的相似度

    sim = get_Sim_u(R);
    %% 预测评分

    PR = zeros(M,N);
    for u = 1:M
        index_n = find(  R(u,:) == 0 );
        for k = 1:length( index_n )
            PR( u, index_n(k) ) = predict_Rate( u, index_n(k), sim, R );
        end
    end
     value = evaluate('precision',R,PR,T,index_i_n);
     hit=hitrate(PR,T,20);


     

    请尊重原创知识,本人非常愿意与大家分享 转载请注明出处:http://www.cnblogs.com/90zeng/ 作者:博客园-90Zeng
  • 相关阅读:
    图片上传-下载-删除等图片管理的若干经验总结3-单一业务场景的完整解决方案
    图片上传-下载-删除等图片管理的若干经验总结2
    HDU 1195 Open the Lock
    HDU 1690 Bus System
    HDU 2647 Reward
    HDU 2680 Choose the best route
    HDU 1596 find the safest road
    POJ 1904 King's Quest
    CDOJ 889 Battle for Silver
    CDOJ 888 Absurdistan Roads
  • 原文地址:https://www.cnblogs.com/90zeng/p/4121645.html
Copyright © 2011-2022 走看看