zoukankan      html  css  js  c++  java
  • 数据导入+欧式距离计算+互信息计算

    数据导入+欧式距离计算+互信息计算

    # coding=utf-8
    import numpy as np
    import math
    #import pandas as pd
    #from sklearn import preprocessing 
    
    nd = np.genfromtxt('11111111.csv', delimiter=',', skip_header=True)
     
    final_list = nd.tolist()#转化为list
    #print(type(final_list))
    #print(final_list)
    a=[]
    b=[]
    c=[]
    num=32
    k=0
    for one in final_list:
        #print(one)
        a.append(tuple(one))#list转化为tuple
    #print(a)
    
    for two in a:
        b.append(np.array(two))#tuple转化为array
    #print(b[0:5])
    def distEclud(vecA, vecB):
        return np.sqrt(sum(np.square(vecA - vecB)))
    
    
    def NMI(A,B):
        # len(A) should be equal to len(B)
        total = len(A)
        A_ids = set(A)
        B_ids = set(B)
        #Mutual information
        MI = 0
        eps = 1.4e-45
        for idA in A_ids:
            for idB in B_ids:
                idAOccur = np.where(A==idA)
                idBOccur = np.where(B==idB)
                idABOccur = np.intersect1d(idAOccur,idBOccur)
                px = 1.0*len(idAOccur[0])/total
                py = 1.0*len(idBOccur[0])/total
                pxy = 1.0*len(idABOccur)/total
                MI = MI + pxy*math.log(pxy/(px*py)+eps,2)
        # Normalized Mutual information
        Hx = 0
        for idA in A_ids:
            idAOccurCount = 1.0*len(np.where(A==idA)[0])
            Hx = Hx - (idAOccurCount/total)*math.log(idAOccurCount/total+eps,2)
        Hy = 0
        for idB in B_ids:
            idBOccurCount = 1.0*len(np.where(B==idB)[0])
            Hy = Hy - (idBOccurCount/total)*math.log(idBOccurCount/total+eps,2)
        MIhat = 2.0*MI/(Hx+Hy)
        return MIhat
    
    if __name__ == '__main__':
        for i in range(num):
            for j in range(num-1):
                if i <j+1:
                    k=k+1
                    #print('第{}个距离'.format(k),distEclud(b[i],b[j+1]))
                    #print('第{}个互信息'.format(k),NMI(b[i],b[j+1]))
                    c.append(NMI(b[i],b[j+1]))
                else:
                    pass
        #n=[]#只有互信息
        dic={}
        q=1  
        for i in c:
            dic['第{}个互信息'.format(q)]='{}'.format(i)
            q=q+1
        #print(dic)
        rankdata=sorted(dic.items(),key=operator.itemgetter(1),reverse=True)
        print(rankdata)    
            
            
  • 相关阅读:
    Linux 中改变主机名的 4 种方法
    如何成为优秀开发人员(一):怎样算是优秀的?
    Java中需要知道的关键字
    Java集合类常见的问题
    如何在 Linux 上复制文件/文件夹到远程系统?
    你还在 Select * 吗?
    技术人解决问题的思路
    如何创建编程语言,以及设计决策中的内容?
    Java内存溢出异常(下)
    如何在 Linux 中查看可用的网络接口
  • 原文地址:https://www.cnblogs.com/xingnie/p/10335013.html
Copyright © 2011-2022 走看看