Python 信息熵-条件熵计算
1 import csv 2 import numpy as np 3 4 5 def read(path: str) -> tuple: 6 with open(path, 'r') as f: 7 text = csv.reader(f) 8 A = [] 9 for row in text: 10 A.append(row) 11 return A, len(A), len(A[0]) 12 13 14 def h(p: float) -> float: 15 return -p * np.log2(p) 16 17 18 def Hc(A: list, N: int, M: int) -> float: 19 count = 0 20 for a in A: 21 count += int(a[M - 1]) 22 p = count / N 23 return h(p) + h(1 - p) 24 25 26 def Hcw(A: list, w: int, N: int, M: int) -> float: 27 subset0 = [] 28 subset1 = [] 29 for a in A: 30 if int(a[w]) == 0: 31 subset0.append(a) 32 else: 33 subset1.append(a) 34 p0 = (len(subset0) / N) 35 p1 = (len(subset1) / N) 36 return p0 * Hc(subset0, N, M) + p1 * Hc(subset1, N, M) 37 38 39 if __name__ == '__main__': 40 path = 'binary_data.csv' 41 A, N, M = read(path) 42 for i in range(5): 43 IG = Hc(A, N, M) - Hcw(A, i, N, M) 44 print("IG(w = %d) = %.5f" % (i, IG))