各种相似度指标:https://www.cnblogs.com/arachis/p/Similarity.html
基于用户协同过滤
def getRecommendations(prefs,person,similarity=sim_pearson):
simSum = {}
totals = {}
for other in prefs:
if other == person: continue
sim = similarity(prefs,person,other)
if sim<=0: continue
for item in prefs[other]:
#只对自己未看过的电影进行评价
if item not in prefs[person] or prefs[person][item]==0:
# Similarity * Score加权评价值
totals.setdefault(item,0) #因为下面是+=,所以必须初始化
totals[item]+=prefs[other][item]*sim
# Sum of similarities 相似值之和
simSum.setdefault(item,0)
simSum[item]+=sim
rankings = [(totals[item]/simSum[item],item) for item in totals]
#或者 rankings = [(total/simSum[item],item) for item,total in totals.items()]
rankings.sort()
rankings.reverse()
return rankings
def transformPrefs(prefs):
result = {}
for person in prefs:
for item in prefs[person]:
result.setdefault(item, {})
# 将物品和人员对调
result[item][person] = prefs[person][item]
return result
def calculateSimilarItems(prefs,n=10):
result={} # 建立字典,以给出与这些物品最为相近的其他物品
itemPrefs=transformPrefs(prefs) # 以物品为中心对偏好矩阵实施倒置处理
for item in itemPrefs: # 寻找与item最为相近的n个物品
scores=topMatches(itemPrefs,item,n=n,similarity=sim_distance)
result[item]=scores
return result