zoukankan      html  css  js  c++  java
  • simrank python实现

    1、数据

    pc,hp.com
    pc,hp.com
    pc,hp.com
    pc,hp.com
    pc,hp.com
    pc,hp.com
    pc,hp.com
    pc,hp.com
    pc,hp.com
    pc,hp.com
    camera,hp.com
    camera,hp.com
    camera,hp.com
    camera,hp.com
    camera,hp.com
    camera,hp.com
    camera,hp.com
    camera,hp.com
    camera,hp.com
    camera,hp.com
    camera,hp.com
    camera,hp.com
    camera,hp.com
    camera,hp.com
    camera,hp.com
    camera,hp.com
    camera,hp.com
    camera,hp.com
    camera,hp.com
    camera,hp.com
    camera,bestbuy.com
    camera,bestbuy.com
    camera,bestbuy.com
    camera,bestbuy.com
    camera,bestbuy.com
    digital camera,hp.com
    digital camera,hp.com
    digital camera,hp.com
    digital camera,hp.com
    digital camera,hp.com
    digital camera,hp.com
    digital camera,hp.com
    digital camera,hp.com
    digital camera,hp.com
    digital camera,hp.com
    digital camera,hp.com
    digital camera,hp.com
    digital camera,hp.com
    digital camera,hp.com
    digital camera,hp.com
    digital camera,hp.com
    digital camera,hp.com
    digital camera,hp.com
    digital camera,hp.com
    digital camera,hp.com
    digital camera,hp.com
    digital camera,hp.com
    digital camera,hp.com
    digital camera,hp.com
    digital camera,hp.com
    digital camera,hp.com
    digital camera,hp.com
    digital camera,hp.com
    digital camera,hp.com
    digital camera,hp.com
    digital camera,bestbuy.com
    digital camera,bestbuy.com
    digital camera,bestbuy.com
    digital camera,bestbuy.com
    digital camera,bestbuy.com
    digital camera,bestbuy.com
    digital camera,bestbuy.com
    tv,bestbuy.com
    tv,bestbuy.com
    tv,bestbuy.com
    tv,bestbuy.com
    tv,bestbuy.com
    tv,bestbuy.com
    tv,bestbuy.com
    tv,bestbuy.com
    tv,bestbuy.com
    tv,bestbuy.com
    tv,bestbuy.com
    tv,bestbuy.com
    tv,bestbuy.com
    tv,bestbuy.com
    tv,bestbuy.com
    flower,teleflora.com
    flower,teleflora.com
    flower,teleflora.com
    flower,teleflora.com
    flower,teleflora.com
    flower,teleflora.com
    flower,teleflora.com
    flower,teleflora.com
    flower,teleflora.com
    flower,teleflora.com
    flower,teleflora.com
    flower,teleflora.com
    flower,teleflora.com
    flower,teleflora.com
    flower,teleflora.com
    flower,teleflora.com
    flower,orchids.com
    flower,orchids.com
    flower,orchids.com
    flower,orchids.com
    flower,orchids.com
    flower,orchids.com
    flower,orchids.com
    flower,orchids.com
    flower,orchids.com
    flower,orchids.com
    flower,orchids.com
    flower,orchids.com
    flower,orchids.com
    flower,orchids.com
    flower,orchids.com
    View Code

    2、simrank 的python实现

    import numpy as np 
    from numpy import matrix
    
    with open('sample1 (1).txt','r') as log_fp:
        logs = [log.strip() for log in log_fp.readlines()]
        # print(logs)
    logs_tuple = [tuple(log.split(",")) for log in logs]
    # print (logs_tuple)
    
    queries = list(set([log[0] for log in logs_tuple]))
    # print(queries)    #['digital camera', 'flower', 'pc', 'camera', 'tv']
    ads = list(set([log[1] for log in logs_tuple]))
    # print(ads)#['hp.com', 'teleflora.com', 'bestbuy.com', 'orchids.com']
    
    graph = np.matrix(np.zeros([len(queries),len(ads)]))
    # print(graph)   #6行4列的0矩阵
    
    for log in logs_tuple:
        query = log[0]
        ad = log[1]
        q_i = queries.index(query)
        a_j = ads.index(ad)
        graph[q_i,a_j] +=1
    print(graph)
    
    query_sim = matrix(np.identity(len(queries)))
    print(query_sim)
    ad_sim = matrix(np.identity(len(ads)))
    print(ad_sim)
    
    def get_ads_num(query):
        q_i = queries.index(query)
        return graph[q_i]
    
    def get_queries_num(ad):
        a_j = ads.index(ad)
        return graph.transpose()[a_j]
    
    def get_ads(query):
        series = get_ads_num(query).tolist()[0]
        return [ads[x] for x in range(len(series)) if series[x] > 0]
    
    def get_queries(ad):
        series = get_queries_num(ad).tolist()[0]
        return [queries[x] for x in range(len(series)) if series[x] > 0]
    
    
    def query_simrank(q1,q2,c):
        if q1 == q2 :
            return 1
        prefix = c/(get_ads_num(q1).sum() *get_ads_num(q2).sum())
        postfix = 0
        for ad_i in get_ads(q1):
            for ad_j in get_ads(q2):
                i = ads.index(ad_i)
                j = ads.index(ad_j)
                postfix += ad_sim[i,j]
        return prefix*postfix
    
    
    def ad_simrank(a1,a2,c):
        if a1 == a2 :
            return 1
        prefix = c/(get_queries_num(a1).sum()*get_queries_num(a2).sum())
        postfix = 0
        for query_i in get_queries(a1):
            for query_j in get_queries(a2):
                i = queries.index(query_i)
                j = queries.index(query_j)
                postfix += query_sim[i,j]
        return prefix*postfix
    
    
    def simrank(c=0.8,times = 1):
        global query_sim,ad_sim
    
        for run in range(times):
            new_query_sim = matrix(np.identity(len(queries)))
            for qi in queries:
                for qj in queries:
                    i = queries.index(qi)
                    j = queries.index(qj)
                    new_query_sim[i,j] =query_simrank(qi,qj,c)
    
            new_ad_sim = matrix(np.identity(len(ads)))
            for ai in ads:
                for aj in ads :
                    i = ads.index(ai)
                    j = ads.index(aj)
                    new_ad_sim[i,j] =ad_simrank(ai,aj,c)
    
            query_sim = new_query_sim
            ad_sim = new_ad_sim
    
    
    if __name__ == '__main__':
        print (queries)
        print(ads)
        simrank()
        print(query_sim)
        print(ad_sim)

    [[15.  0.  0.  0.]
     [ 0.  0. 10.  0.]
     [ 5.  0. 20.  0.]
     [ 7.  0. 30.  0.]
     [ 0. 16.  0. 15.]]
    [[
    1. 0. 0. 0. 0.] [0. 1. 0. 0. 0.] [0. 0. 1. 0. 0.] [0. 0. 0. 1. 0.] [0. 0. 0. 0. 1.]]
    [[
    1. 0. 0. 0.] [0. 1. 0. 0.] [0. 0. 1. 0.] [0. 0. 0. 1.]]
    [
    'tv', 'pc', 'camera', 'digital camera', 'flower']
    [
    'bestbuy.com', 'teleflora.com', 'hp.com', 'orchids.com']
    [[
    1. 0. 0.00213333 0.00144144 0. ] [0. 1. 0.0032 0.00216216 0. ] [0.00213333 0.0032 1. 0.00172973 0. ] [0.00144144 0.00216216 0.00172973 1. 0. ] [0. 0. 0. 0. 1. ]]
    [[
    1.00000000e+00 0.00000000e+00 9.87654321e-04 0.00000000e+00] [0.00000000e+00 1.00000000e+00 0.00000000e+00 3.33333333e-03] [9.87654321e-04 0.00000000e+00 1.00000000e+00 0.00000000e+00] [0.00000000e+00 3.33333333e-03 0.00000000e+00 1.00000000e+00]]
  • 相关阅读:
    fiddler强大功能用法(二)
    fidder强大功能用法(一)
    fidder使用
    postman
    bug的一生:如何体现测试专业度?
    Fiddler无法抓到https的解决方法
    Fiddler工具安装下载使用
    z-index
    position:absolute
    ajax跨域,json,jsonp
  • 原文地址:https://www.cnblogs.com/spp666/p/11821700.html
Copyright © 2011-2022 走看看