zoukankan      html  css  js  c++  java
  • 用户标签

    代码:用户标签

    通过标签将用户和物品联系起来

    标签的作用:
    1.物品相关:物品的属性(时间,创作者等)
    2.用户相关:用户对物品的看法、任务(待读等)
     
    算法:
    1.简单算法(推荐用户常用标签下的热门物品)
      计算用户对物品的喜好
      $p(u,i)=sum_b frac{n_{u,b}}{log(1+n_b^{(u)})}frac{n_{i,b}}{log(1+n_i^{(u)})}$
      
    # coding=gbk
    import pandas as pd
    import math
    
    data=pd.read_csv('delicious.dat',sep='	',header=None)
    
    class SimpleTagBased:
        #{用户1:{标签A:5,...}...}
        user_tag=dict()
        #用户评价过的物品,在推荐时过滤掉这些物品
        user_item=dict()
        #每个标签对应的物品
        tag_item=dict()
        tag_count=dict()
        item_count=dict()
        item_tag=dict()
        
        def __init__(self,data):
            for user,item,tags in data.itertuples(index=False):
                if type(tags)==float:
                    continue
                if user not in self.user_item:
                    self.user_item[user]=list()
                self.user_item[user].append(item)
                
                if item not in self.item_count:
                    self.item_count[item]=0
                self.item_count[item]+=1
                
                if item not in self.item_tag:
                    self.item_tag[item]=dict()
                
                tags=tags.split(' ')
                if user not in self.user_tag:
                    self.user_tag[user]=dict()
                
                for tag in tags:
                    tag = tag.lower()
                    if tag not in self.user_tag[user]:
                        self.user_tag[user][tag]=0
                    self.user_tag[user][tag]+=1
                    
                    if tag not in self.tag_item:
                        self.tag_item[tag]=dict()
                    if item not in self.tag_item[tag]:
                        self.tag_item[tag][item]=0
                    self.tag_item[tag][item]+=1
                    
                    if tag not in self.tag_count:
                        self.tag_count[tag]=0
                    self.tag_count[tag]+=1
                    
                    if tag not in self.item_tag[item]:
                        self.item_tag[item][tag]=0
                    self.item_tag[item][tag]+=1
                    
        def recommend(self,user):
            viewedItem=self.user_item[user]
            rank=dict()
            
            utags = self.user_tag[user]
            for tag,weight in utags.items():
                for item,wt in self.tag_item[tag].items():
                    if item in viewedItem:
                        continue
                    if item not in rank:
                        rank[item]=0
                    #用户user对物品item的喜好程度
                    rank[item]+=weight*1.0/math.log(1+self.tag_count[tag])*wt/math.log(1+self.item_count[item])
            rank=[a[0] for a in sorted(rank.items(),lambda x,y:cmp(x[1],y[1]),reverse=True)[0:5]];
            res = []
            for item in rank:
                #物品被打的最多的10个标签作为物品的描述
                res.append([a[0] for a in sorted(self.item_tag[item].items(),lambda x,y:cmp(x[1],y[1]),reverse=True)[0:10]])
            #用户最常用的10个标签作为用户兴趣描述
            userdesc = [a[0] for a in sorted(utags.items(),lambda x,y:cmp(x[1],y[1]),reverse=True)[0:10]]
            return (userdesc,res)
    stb = SimpleTagBased(data=data)
    userdesc,res = stb.recommend(104)
    print userdesc
    print res

    结果:

    --用户常用标签

    ['software', 'webdesign', 'tools', 'dev', 'howto', 'free', 'freeware', 'opensource', 'reference', 'linux']

    --所推荐物品具有的标签
    ['css', 'webdesign', 'reference', 'design', 'web', 'development', 'html', 'tools', 'webdev', 'programming'],
    ['webdesign', 'templates', 'design', 'css', 'opensource', 'web', 'free', 'html', 'layout', 'template'],
    ['fonts', 'typography', 'webdesign', 'design', 'tools', 'css', 'web', 'font', 'type', 'reference'],
    ['opensource', 'software', 'freeware', 'linux', 'free', 'windows', 'tools', 'reference', 'download', 'alternative'],
    ['freeware', 'software', 'utilities', 'tools', 'free', 'reference', 'list', 'windows', 'download', 'opensource']

      改进:
      A.打过标签少的用户(生成相似标签)
        标签相似度:(同一物品下的标签相似,两标签同时出现在不同物品下,认为标签相似度高)
        使用余弦相似度度量:$(n_{b,1},n_{b,2},n_{b,3},...,n_{b,n})$各量为物品i被打上标签b的次数
    # coding=gbk
    import pandas as pd
    import math
    
    data=pd.read_csv('delicious.dat',sep='	',header=None)
    
    item_tag=dict()
    for user,item,tags in data.itertuples(index=False):
        #如果tags为nan,跳过
        if type(tags)==float:
            continue
        
        if item not in item_tag:
            item_tag[item]=dict()
        
        tags=tags.split(' ')
        for tag in tags:
            tag = tag.lower()
            if tag not in item_tag[item]:
                item_tag[item][tag]=0
            item_tag[item][tag]+=1
    
    def recommend(taga,n,item_tag):
        nb=dict()
        nab=dict()
        na = 0
        
        l = len(item_tag)
        i=1
        for item,tags in item_tag.items():
            
            print i*1.0/l
            i+=1
            
            if taga not in tags:
                for tag,v in tags.items():
                    if tag not in nb:
                        nb[tag]=0
                    nb[tag]+=v*v
            else:
                av = tags[taga]
                na +=av*av
                for tag,v in tags.items():
                    if tag==taga:
                        continue
                    if tag not in nb:
                        nb[tag]=0
                    nb[tag]+=v*v
                    if tag not in nab:
                        nab[tag]=0
                    nab[tag]+=av*v
        
        rank=dict()
        na = math.sqrt(na)
        for tag,v in nab.items():
            rank[tag]=v/na/math.sqrt(nb[tag])
            
        res = [a[0] for a in sorted(rank.items(),lambda x,y:cmp(x[1],y[1]),reverse=True)[0:n]]
        return res
    
    tags=recommend('webdesign',10,item_tag)
    print tags

    结果:和webdesign相关的标签

    ['design', 'css', 'web', 'webdev', 'html', 'web_design', 'inspiration', 'xhtml', 'webdevelopment', 'resources']

    2.基于图的算法
      
     解法参看:概率图模型
    给用户推荐标签:
      对于物品i给用户u推荐标签,标签$b_k$的推荐度为:
      $(1-alpha)frac{n_{u,b_k}}{max(n_{u,b_j})}+alphafrac{n_{i,b_k}}{max(n_{i,b_j})}$
    # coding=gbk
    import pandas as pd
    import math
    
    data=pd.read_csv('delicious.dat',sep='	',header=None)
    
    item_tag=dict()
    user_tag=dict()
    for user,item,tags in data.itertuples(index=False):
        #如果tags为nan,跳过
        if type(tags)==float:
            continue
        
        if item not in item_tag:
            item_tag[item]=dict()
        if user not in user_tag:
            user_tag[user]=dict()
        
        tags=tags.split(' ')
        for tag in tags:
            tag = tag.lower()
            if tag not in item_tag[item]:
                item_tag[item][tag]=0
            item_tag[item][tag]+=1
            
            if tag not in user_tag[user]:
                user_tag[user][tag]=0
            user_tag[user][tag]+=1
    
    def recommend(user,item,user_tag,item_tag,alpha):
        utgs = user_tag[user]
        itgs = item_tag[item]
        
        udesc = [a[0] for a in sorted(utgs.items(),lambda x,y:cmp(x[1],y[1]),reverse=True)[0:10]]
        idesc = [a[0] for a in sorted(itgs.items(),lambda x,y:cmp(x[1],y[1]),reverse=True)[0:10]]
        
        rank = dict()
        maxu = max(utgs.values())
        maxi = max(itgs.values())
        for tag,v in utgs.items():
            if tag not in rank:
                rank[tag] = 0
            rank[tag]+=(1-alpha)*v/maxu
        
        for tag,v in itgs.items():
            if tag not in rank:
                rank[tag] = 0
            rank[tag]+= alpha*v/maxi
        
        res=[a[0] for a in sorted(rank.items(),lambda x,y:cmp(x[1],y[1]),reverse=True)[0:10]];
        return udesc,idesc,res
    
    udesc,idesc,res=recommend(104,33911,user_tag,item_tag,0.8)
    print udesc
    print idesc
    print res

    结果:

    --用户常用标签

    ['software', 'webdesign', 'tools', 'dev', 'howto', 'free', 'freeware', 'opensource', 'reference', 'linux']

    --物品常被打标签
    ['web', 'softwareagents', 'java', 'howto', 'moviles', 'documentation', 'semantica', 'hpi', 'api', 'agents']

    --推荐标签
    ['howto', 'web', 'moviles', 'softwareagents', 'hpi', 'agents', 'api', 'jade', 'agentes', 'java']

  • 相关阅读:
    gitlab: git clone/pull / push: The project you were looking for could not be found
    转载: MySQL启动出错InnoDB: Check that you do not already have another mysqld process解决方法
    root用户删除文件,提示:Operation not permitted
    使用dockerfile打包新镜像
    kubernets创建Deployment
    代理全家福
    Spring事务传播详解
    [FFmpeg]Centos7 yum安装
    [Redis]存放字典
    [Docker]开放2375端口
  • 原文地址:https://www.cnblogs.com/porco/p/4452435.html
Copyright © 2011-2022 走看看