zoukankan      html  css  js  c++  java
  • 利用python实现《数据挖掘——概念与技术》一书中描述的Apriori算法

     1 from itertools import combinations
     2 
     3 data = [['I1', 'I2', 'I5'], ['I2', 'I4'], ['I2', 'I3'], ['I1', 'I2', 'I4'], ['I1', 'I3'],
     4         ['I2', 'I3'], ['I1', 'I3'], ['I1', 'I2', 'I3', 'I5'], ['I1', 'I2', 'I3']]
     5 
     6 
     7 # 候选集生成
     8 # 输入:
     9 # f_set: k-1项集, k:项集个数
    10 # 输出:
    11 # k_cand:k项候选集
    12 def apriori_gen(f_set, k):
    13     k_cand = []
    14     temp = [frozenset(l) for l in combinations(f_set, k)]
    15     for t in temp:
    16         if has_infrequent_subset(t, f_set):
    17             del t
    18         else:
    19             k_cand.append(t)
    20     return k_cand
    21 
    22 # 非频繁项集的超集也是非频繁的
    23 def has_infrequent_subset(c_set, f_set):
    24     for subset in c_set:
    25         if not frozenset([subset]).issubset(f_set):
    26             return True
    27     return False
    28 
    29 # 输入(绝对)最小支持度, min_sup
    30 # 输出:全部频繁项集(不包括一项集), all_f_set
    31 def get_f_set(min_sup=2):
    32     all_f_set = []
    33     L1 = frozenset([d for ds in data for d in ds])
    34     k = 2
    35     size = len(L1)
    36     while k <= size:
    37         c_k = frozenset(apriori_gen(L1, k))
    38         for c in c_k:
    39             count = 0
    40             for d in data:
    41                 if c.issubset(frozenset(d)):
    42                     count += 1
    43             if count >= min_sup:
    44                 all_f_set.append((c, count))
    45         k += 1
    46     return all_f_set
    47 
    48 if __name__ == '__main__':
    49     all_frequent_set = get_f_set()
    50     for i in all_frequent_set:
    51         print(i)

  • 相关阅读:
    HTML图片轮播
    HTML横向二级导航
    HTML 5的革新——语义化标签
    Git撤销修改
    git add -A 和 git add . 的区别
    git reset HEAD 与 git reset --hard HEAD的区别
    Git 工作区、暂存区和版本库
    Python笔记 #21# DHNN
    算法导论(第三版)练习 6.1-1 ~ 6.1-7
    算法导论(第三版)练习 4.1-1 ~ 4.1-5
  • 原文地址:https://www.cnblogs.com/laresh/p/7788713.html
Copyright © 2011-2022 走看看