zoukankan      html  css  js  c++  java
  • Understanding matrix factorization for recommendation

    http://nicolas-hug.com/blog/matrix_facto_4

    import numpy as np
    import surprise  # run 'pip install scikit-surprise' to install surprise
    from surprise.model_selection import cross_validate
    
    
    class MatrixFacto(surprise.AlgoBase):
        '''A basic rating prediction algorithm based on matrix factorization.'''
    
        def __init__(self, learning_rate, n_epochs, n_factors):
    
            self.lr = learning_rate  # learning rate for SGD
            self.n_epochs = n_epochs  # number of iterations of SGD
            self.n_factors = n_factors  # number of factors
    
        def fit(self, trainset):
            '''Learn the vectors p_u and q_i with SGD'''
    
            print('Fitting data with SGD...')
    
            # Randomly initialize the user and item factors.
            p = np.random.normal(0, .1, (trainset.n_users, self.n_factors))
            q = np.random.normal(0, .1, (trainset.n_items, self.n_factors))
    
            # SGD procedure
            for _ in range(self.n_epochs):
                for u, i, r_ui in trainset.all_ratings():
                    err = r_ui - np.dot(p[u], q[i])
                    # Update vectors p_u and q_i
                    p[u] += self.lr * err * q[i]
                    q[i] += self.lr * err * p[u]
                    # Note: in the update of q_i, we should actually use the previous (non-updated) value of p_u.
                    # In practice it makes almost no difference.
    
            self.p, self.q = p, q
            self.trainset = trainset
    
        def estimate(self, u, i):
            '''Return the estmimated rating of user u for item i.'''
    
            # return scalar product between p_u and q_i if user and item are known,
            # else return the average of all ratings
            if self.trainset.knows_user(u) and self.trainset.knows_item(i):
                return np.dot(self.p[u], self.q[i])
            else:
                return self.trainset.global_mean
    
    
    # data loading. We'll use the movielens dataset (https://grouplens.org/datasets/movielens/100k/)
    # it will be downloaded automatically.
    data = surprise.Dataset.load_builtin('ml-100k')
    #data.split(2)  # split data for 2-folds cross validation
    
    
    
    
    algo = MatrixFacto(learning_rate=.01, n_epochs=10, n_factors=10)
    #surprise.evaluate(algo, data, measures=['RMSE'])
    cross_validate(algo, data, measures=['RMSE', 'MAE'], cv=5, verbose=True)
  • 相关阅读:
    Flex 和JavaScript 交互(带参)
    dos创建快捷方式
    IE的onunload事件里调用DWR
    使用VS2008开发OPC客户端程序
    实时库GEHistorian的一些问题记录
    SQL SERVER中,把一个表中的数据导入到另一个表中
    SQLSERVER 触发器
    SqlServer数据复制出现的问题与处理
    了解Log.Debug 或 log4net
    VS2008应用程序部署时将Framework3.5打包到安装包中
  • 原文地址:https://www.cnblogs.com/TMatrix52/p/11636698.html
Copyright © 2011-2022 走看看