zoukankan      html  css  js  c++  java
  • 基于物品的协同过滤Base Item CF (使用jaccard_score 距离评价标准)

    
    
    import  pandas as pd
    import  pprint
    import  numpy as np
    
    users = ['user1','user2','user3','user4','user5',]
    items = ['item A','item B','item C','item D','item E',]
    
    
    datasets = [
        [1,0,1,1,0],
        [1,0,0,1,1],
        [1,0,1,0,0],
        [0,1,0,1,1],
        [1,1,1,0,1]
    ]
    
    
    df = pd.DataFrame(datasets,columns= items,index= users)
    
    print(df)
    
    from sklearn.metrics import  jaccard_similarity_score,jaccard_score
    
    # sim = jaccard_similarity_score(df['item A'],df['item B'])
    sim = jaccard_score(df['item A'],df['item B'])
    print(sim)
    
    
    from  sklearn.metrics.pairwise import  pairwise_distances
    
    user_similar = 1 - pairwise_distances(df.values,metric='jaccard')
    # print(user_similar)
    
    user_similar = pd.DataFrame(user_similar,columns=users,index=users)
    print(user_similar)
    
    
    items_similar = 1 - pairwise_distances(df.T.values,metric='jaccard')
    
    items_similar = pd.DataFrame(items_similar,columns=items,index=items)
    print(items_similar)
    
    
    topN_items = {}
    # print(user_similar.index)
    for i in items_similar.index:
        _df = items_similar.loc[i].drop([i])
        print(_df)
        _df_sorted = _df.sort_values(ascending=False)
        top2 = list(_df_sorted.index[:2])
        topN_items[i] = top2
    
    print('Top 2 相似物品:')
    print(topN_items)
    
    
    rs_results = {}
    
    for user in df.index:
        rs_result = set()
        for item in df.loc[user].replace(0,np.nan).dropna().index: #取出每个用户当前已经购买物品列表
             #根据每个物品 找出最相似的TOP_N物品,构建初始推荐结果
             rs_result = rs_result.union(topN_items[item])
        #过滤掉用户自己已经购买的物品
        rs_result -= set(df.loc[user].replace(0,np.nan).dropna().index)
        rs_results[user] = rs_result
    print('最终推荐结果:')
    print(rs_results)
    
    
    


    最终推荐结果:
    {'user1': {'item B', 'item E'}, 'user2': {'item C', 'item B'}, 'user3': {'item B', 'item E'}, 'user4': {'item A'}, 'user5': {'item D'}}





  • 相关阅读:
    消息队列介绍
    SpringBoot随笔-SpringBoot集成Druid
    Redis-Redis基本类型及使用Java操作
    信息安全
    计算机网络基础
    多媒体技术
    数据库基础
    程序设计基础
    计算机软件体系
    计算机硬件体系
  • 原文地址:https://www.cnblogs.com/kpwong/p/13600362.html
Copyright © 2011-2022 走看看