zoukankan      html  css  js  c++  java
  • 大数据之路【第十五篇】:数据挖掘--推荐算法

    import web
    import sys
    
    reload(sys)
    sys.setdefaultencoding('utf-8')
    
    sys.path.append("./jieba/")
    
    import jieba
    import jieba.posseg
    import jieba.analyse
    
    urls = (
        '/', 'index',
        '/test', 'test',
    )
    
    app = web.application(urls, globals())
    
    
    rec_map = {}
    with open('inverted.data', 'r') as fd:
        for line in fd:
        ss = line.strip().split('	')
        if len(ss) != 2:
            continue
        token = ss[0].strip().encode('utf8')
        music_rec_list_str = ss[1].strip()
    
        for music_score in music_rec_list_str.split(''):
            name, score = music_score.strip().split('')
            if token not in rec_map:
            rec_map[token] = []
            rec_map[token].append((name, round(float(score), 2)))
    
    
    print len(rec_map)
    
    class index:
        def GET(self):
        params = web.input()
        content = params.get('content', '')
        print 'content: ', content
    
        # for k, v in rec_map.items():
        #     if content == k:
        #         print k
        #         print v
        #         print '===='
    
        # if content not in rec_map.keys():
        #     return 'no found!'
        # else:
        #     tmp_list = []
        #     for tup in rec_map[content.encode('utf8')]:
        #         name, score = tup 
        #     print name
        #     tmp_list.append(name)
        #     return '
    '.join(tmp_list)
    
        seg_list = jieba.cut(content, cut_all=False)
    
        result_map = {}
        for seg in seg_list:
            print 'seg: ', seg
            if seg in rec_map.keys():
            print '1111111111'
            for name_score in rec_map[seg.encode('utf8')]:    
                tmp_name, score = name_score
                name = tmp_name.encode('utf8')    
                if name not in result_map:
                print '22222222'
                result_map[name] = score
                else:
                print '3333333'
                    old_score = result_map[name]
                new_score = old_score + score
                result_map[name] = new_score
    
        rec_list = []
        for k, v in result_map.items():
            rec_list.append('	'.join([k, str(v)]))
    
        return "
    ".join(rec_list)
    
    class test:
        def GET(self):
        print web.input()
        return '222'
    
    if __name__ == "__main__":
        app.run()

    搜索MV推荐

     搜索周杰伦

  • 相关阅读:
    目前阻碍大数据成功的常见问题有哪些
    YII2 的授权(Authorization)
    CCBPM工作流引擎的消息机制与设计
    日志框架实现实时改动,实时生效,详细框架思路(2)
    manacher hihoCoder1032 最长回文子串
    C#.NEt-GDI+中的Pen測试
    Unity3D游戏开发最佳实践20技巧(一)
    Android init.rc文件浅析
    OBIEE开发手冊
    Android Jni层 创建 linux socket 出错问题解决
  • 原文地址:https://www.cnblogs.com/hackerer/p/11482159.html
Copyright © 2011-2022 走看看