zoukankan      html  css  js  c++  java
  • TensorFlow(七) 地址匹配

    import tensorflow as tf
    import numpy as np
    import string,random
    
    n=10
    #街道名称
    street_names=['abbey','baker','canal','donner','elm']
    #街道类型
    street_types=['rd','st','ln','pass','ave']
    #邮编
    rand_zips=[ random.randint(65000,65999) for i in range(5) ]
    #门牌号码
    numbers=[random.randint(1,9999) for i in range(n)]
    #街道名称
    streets=[random.choice(street_names) for i in range(n)]
    #街道类型
    street_stuffs=[random.choice(street_types) for i in range(n)]
    #邮编
    zips=[random.choice(rand_zips) for i in range(n)]
    # '4905 baker rd', '6417 canal st'
    full_streets=[str(x)+' '+y+' '+z for x,y,z in zip(numbers,streets,street_stuffs)]
    #['8758 elm st', 65469], ['3237 donner rd', 65479] shape=(10,2)
    ref_data=[list(x) for x in zip(full_streets,zips)]
    
    #生成错误随机数据
    def create_type(s):
        rand_ind=random.choice(range(len(s)))
        s=list(s)
        s[rand_ind]=random.choice(string.ascii_lowercase)
        s=''.join(s)
        return s
    #错误街道名
    type_streets=[ create_type(x) for x in streets]
    type_full_streets=[str(x)+' '+y+' '+z for x,y,z in zip(numbers,type_streets,street_stuffs)]
    #['8758 efm st', 65469], ['3237 donnzr rd', 65479] shape=(10,2)
    test_data=[list(x) for x in zip(type_full_streets,zips)]
    #声明变量 占位符
    sess=tf.Session()
    test_address=tf.sparse_placeholder(dtype=tf.string)
    test_zip=tf.placeholder(shape=[None,1],dtype=tf.float32)
    ref_address=tf.sparse_placeholder(dtype=tf.string)
    ref_zip=tf.placeholder(shape=[None,n],dtype=tf.float32)
    
    #邮编距离 shape=(1,10) target=(0,无穷尽) 当为0时 相似度为一
    zip_dist=tf.square(tf.subtract(ref_zip,test_zip))
    #地址距离 返回(n,1)  (错误的个数 归一化)
    address_dist=tf.edit_distance(test_address,ref_address,normalize=True)
    
    #计算相似度 (0-1)
    zip_max=tf.gather(tf.squeeze(zip_dist),tf.argmax(zip_dist,1))
    zip_min=tf.gather(tf.squeeze(zip_dist),tf.argmin(zip_dist,1))
    #(1,10)
    zip_sim=tf.div(tf.subtract(zip_max,zip_dist),tf.subtract(zip_max,zip_min) )
    #(10,1)
    address_sim=tf.subtract(1.,address_dist)
    
    #加权平均
    address_weight=0.5
    zip_weight=1-address_weight
    
    #shape=(1,10)
    weight_sim=tf.add(tf.transpose(tf.multiply(address_weight,address_sim)),tf.multiply(zip_weight,zip_sim) )
    #获取最大索引
    top_match_index=tf.arg_max(weight_sim,1)
    
    #地址转换成稀疏向量
    def sparese_word_vec(word):
        #['abbey', 'baker']
        num_words=len(word)
        indcies=[[xi,0,yi] for xi,x in enumerate(word) for yi,y in enumerate(x) ]
        chars=list(''.join(word))
        return (tf.SparseTensorValue(indcies,chars,[num_words,1,1]))
    
    #获取标准地址
    referece_address=np.array([x[0] for x in ref_data])
    #(1,10)
    referece_zips=np.array( [[x[1] for x in ref_data]])
    
    #转换
    sparse_ref_set=sparese_word_vec(referece_address)
    
    for i in range(n):
        test_address_entry=test_data[i][0]
        test_zip_entry=[[test_data[i][1]]]
    
        test_address_repeat=[test_address_entry]*n
        sparse_test_set=sparese_word_vec(test_address_repeat)
    
        feeddict={test_address:sparse_test_set,test_zip:test_zip_entry,ref_address:sparse_ref_set,ref_zip:referece_zips }
        #获取最大相似度索引
        best_match=sess.run(top_match_index,feed_dict=feeddict)
        best_street=referece_address[best_match]
        [best_zip]=referece_zips[0][best_match]
        [[test_zip_]]=test_zip_entry
        print("Error address : " +str(test_address_entry) +"   "+str(test_zip_))
        print("Match: "+str(best_street)+"  "+str(best_zip))

  • 相关阅读:
    springboot之session、cookie
    Springboot的异步线程池
    spring自带的定时任务功能@EnableScheduling
    SpringBoot+SpringCloud实现登录用户信息在微服务之间的传递
    sss
    sss
    sss
    sss
    sss
    sss
  • 原文地址:https://www.cnblogs.com/x0216u/p/9237761.html
Copyright © 2011-2022 走看看