zoukankan      html  css  js  c++  java
  • LFM python 实现

    最近参加一个推荐算法比赛,想试一下LFM 来提高预测精度。自己尝试写了一个LFM 实现。

    数据规模比较大时,性能较差。浮点运算会有超精度的情况。建议还是应该使用libfm.


    参考公式 :

    损式函数

    梯度公式:

    采用随机梯度下降

    # coding:utf-8
    import pandas as pd
    import numpy as np
    import math
    import operator
    import time
    import random
    
    
    def initpara(users, items, F):
        p = dict()
        q = dict()
    
        for userid in users:
            p[userid] = [(-1 + 2 * random.random()) for f in range(0, F)]  # / math.sqrt(F)
    
        for itemid in items:
            q[itemid] = [(-1 + 2 * random.random()) for f in range(0, F)]  # / math.sqrt(F)
    
        return p, q
    
    
    def initsamples(user_items):
        user_samples = []
        items_pool = []
        for userid, items in user_items.items():
            for item in items:
                items_pool.append(item)
    
        for userid, items in user_items.items():
            samples = dict()
            for itemid, score in items.items():
                if score != 0:
                    samples[itemid] = score
            user_samples.append((userid, samples))
    
        return user_samples
    
    
    def initmodel(user_items, users, items, F):
        p, q = initpara(users, items, F)
        user_samples = initsamples(user_items)
    
        return p, q, user_samples
    
    
    def predict(userid, itemid, p, q):
        a = sum(p[userid][f] * q[itemid][f] for f in range(0, len(p[userid])))
        return a
    
    
    def lfm(user_items, users, items, F, N, alpha, lamda):
        '''
        LFM计算参数 p,q
        :param user_items: user_items
        :param users: users
        :param items: items
        :param F: 隐类因子个数
        :param N: 迭代次数
        :param alpha: 步长
        :param lamda: 正则化参数
        :return: p,q
        '''
        p, q, user_samples = initmodel(user_items, users, items, F)
    
        debugid1 = 0
        debugid2 = 0
        for step in range(0, N):
            random.shuffle(user_samples)  # 乱序
    
            error = 0
            count = 0
            for userid, samples in user_samples:
                for itemid, rui in samples.items():
                    pui = predict(userid, itemid, p, q)
                    eui = rui - pui
                    count += 1
                    error += math.pow(eui, 2)
                    '''debug'''
                    if userid == 1:
                        if debugid1 == 0 and rui == 1:
                            debugid1 = itemid
                        if debugid2 == 0 and rui == -1:
                            debugid2 = itemid
    
                    if userid == 1 and itemid == debugid1:
                        print debugid1, rui, pui, eui, alpha
                    if userid == 1 and itemid == debugid2:
                        print debugid2, rui, pui, eui, alpha
    
                    '''debug end'''
    
                    for f in range(0, F):
                        p_u = p[userid][f]
                        q_i = q[itemid][f]
    
                        p[userid][f] += alpha * (eui * q_i - lamda * p_u)
                        q[itemid][f] += alpha * (eui * p_u - lamda * q_i)
    
            rmse = math.sqrt(error / count)
            print  "rmse:", rmse
            alpha *= 0.9
        return p, q
    
    
    def predictlist(userid, items, p, q):
        predict_score = dict()
        for itemid in items:
            p_score = predict(userid, itemid, p, q)
            predict_score[itemid] = p_score
    
        return predict_score
    
    
    def recommend():
        print 'start'
        user_items = {1: {'a': 1, 'b': -1, 'c': -1, 'd': -1, 'e': 1, 'f': 1, 'g': -1},
                      2: {'a': -1, 'b': 1, 'c': -1, 'd': 1, 'e': 1, 'f': 1, 'g': 1},
                      3: {'a': 1, 'b': -1, 'c': 0, 'd': -1, 'e': -1, 'f': -1, 'g': 1},
                      4: {'a': 1, 'b': -1, 'c': -1, 'd': 0, 'e': 1, 'f': 1, 'g': 1},
                      5: {'a': -1, 'b': 1, 'c': 1, 'd': 1, 'e': -1, 'f': -1, 'g': 0},
                      6: {'a': 1, 'b': 0, 'c': -1, 'd': -1, 'e': 1, 'f': -1, 'g': -1}}
        users = {1, 2, 3, 4, 5, 6}
        items = {'a', 'b', 'c', 'd', 'e', 'f', 'g'}
        F = 5
        N = 30
        alpha = 0.3
        lamda = 0.03
        p, q = lfm(user_items, users, items, F, N, alpha, lamda)
    
        for userid, itemdic in user_items.items():
            print userid
            print "target", itemdic
            predict_score = predictlist(userid, itemdic, p, q)
            print  "predicted", predict_score
    
        print 'end'
    
    
    if __name__ == "__main__":
        recommend()

    运行结果

  • 相关阅读:
    百万级数据迁移方案测评小记
    EFCore-一对一配置外键小记2
    mpvue实战-手势滑动导航栏
    React-Native WebView使用本地js,css渲染html
    Dubbo测试环境服务调用隔离这么玩对么
    Kitty中的动态线程池支持Nacos,Apollo多配置中心了
    嘘!异步事件这样用真的好么?
    一时技痒,撸了个动态线程池,源码放Github了
    熬夜之作:一文带你了解Cat分布式监控
    这个Maven依赖的问题,你敢说你没遇到过
  • 原文地址:https://www.cnblogs.com/zrhai/p/6934398.html
Copyright © 2011-2022 走看看