zoukankan      html  css  js  c++  java
  • python实现简单关联规则Apriori算法

     1 from itertools import combinations
     2 from copy import deepcopy
     3 
     4 
     5 # 导入数据,并剔除支持度计数小于min_support的1项集
     6 def load_data(data):
     7     I_dict = {}
     8     for i in data:
     9         for j in i:
    10             I_dict[j] = I_dict.get(j, 0) + 1
    11     F_dict = deepcopy(I_dict)
    12     for k in I_dict.keys():
    13         if F_dict.get(k) < min_support:
    14             del F_dict[k]
    15     return F_dict
    16 
    17 
    18 # 判断频繁项集是否大于min_support
    19 def get_support_set(p_set):
    20     item_supp_set = []
    21     for item in p_set:
    22         count = 0
    23         for ds in data_set:
    24             if item.issubset(ds):
    25                 count += 1
    26         if count >= min_support:
    27             item_supp_set.append([item, count])
    28     return item_supp_set
    29 
    30 
    31 # 找出所有频繁项集
    32 # 以二项集为初始集
    33 def get_all_items(two_set, k=3):
    34     all_frequent = []
    35     flag = True
    36     while flag:
    37         mid_set = []
    38         temp = []
    39         t_ = [ks[0] for ks in two_set]
    40         for kk in t_:
    41             for tt in kk:
    42                 if tt not in temp:
    43                     temp.append(tt)
    44         k_ = [set(t) for t in combinations(temp, k)]
    45         for ff in k_:
    46             count_k = 0
    47             for d in t_:
    48                 if ff.issuperset(d):
    49                     count_k += 1
    50             if count_k == k:
    51                 mid_set.append(ff)
    52         frequent_mid_set = get_support_set(mid_set)
    53         if mid_set:
    54             k += 1
    55             two_set = frequent_mid_set
    56             all_frequent.extend(frequent_mid_set)
    57         else:
    58             flag = False
    59     return all_frequent
    60 
    61 
    62 if __name__ == '__main__':
    63     data = [['I1', 'I2', 'I5'],
    64             ['I2', 'I4'],
    65             ['I2', 'I3'], 
    66             ['I1', 'I2', 'I4'],
    67             ['I1', 'I3'],
    68             ['I2', 'I3'],
    69             ['I1', 'I3'],
    70             ['I1', 'I2', 'I3', 'I5'],
    71             ['I1', 'I2', 'I3']]
    72     data_set = [set(d) for d in data]
    73     min_support = 1
    74     one = [[{lk}, lv] for lk, lv in load_data(data).items()]
    75     two = [set(t) for t in combinations(list(load_data(data).keys()), 2)]
    76     two_f_set = get_support_set(two)
    77     all_frequent_set = one + two_f_set + get_all_items(two_f_set)
    78     for afs in all_frequent_set:
    79         print(afs)

    输出结果:

  • 相关阅读:
    重构第30天 尽快返回 (Return ASAP)
    Thingsboard MQTT连接至服务器
    Thingsboard学习之三启动Thingsboard
    Thingsboard学习之二安装Docker和Docker-Compose
    Thingsboard学习之一CentOS安装系统更新
    CentOS 修改固定IP地址
    macbook配置homebrew
    macbook配置flutter环境变量
    Linux单独打包工具-Ubuntu
    postman测试API
  • 原文地址:https://www.cnblogs.com/laresh/p/7665777.html
Copyright © 2011-2022 走看看