zoukankan      html  css  js  c++  java
  • 利用python实现《数据挖掘——概念与技术》一书中描述的Apriori算法

     1 from itertools import combinations
     2 
     3 data = [['I1', 'I2', 'I5'], ['I2', 'I4'], ['I2', 'I3'], ['I1', 'I2', 'I4'], ['I1', 'I3'],
     4         ['I2', 'I3'], ['I1', 'I3'], ['I1', 'I2', 'I3', 'I5'], ['I1', 'I2', 'I3']]
     5 
     6 
     7 # 候选集生成
     8 # 输入:
     9 # f_set: k-1项集, k:项集个数
    10 # 输出:
    11 # k_cand:k项候选集
    12 def apriori_gen(f_set, k):
    13     k_cand = []
    14     temp = [frozenset(l) for l in combinations(f_set, k)]
    15     for t in temp:
    16         if has_infrequent_subset(t, f_set):
    17             del t
    18         else:
    19             k_cand.append(t)
    20     return k_cand
    21 
    22 # 非频繁项集的超集也是非频繁的
    23 def has_infrequent_subset(c_set, f_set):
    24     for subset in c_set:
    25         if not frozenset([subset]).issubset(f_set):
    26             return True
    27     return False
    28 
    29 # 输入(绝对)最小支持度, min_sup
    30 # 输出:全部频繁项集(不包括一项集), all_f_set
    31 def get_f_set(min_sup=2):
    32     all_f_set = []
    33     L1 = frozenset([d for ds in data for d in ds])
    34     k = 2
    35     size = len(L1)
    36     while k <= size:
    37         c_k = frozenset(apriori_gen(L1, k))
    38         for c in c_k:
    39             count = 0
    40             for d in data:
    41                 if c.issubset(frozenset(d)):
    42                     count += 1
    43             if count >= min_sup:
    44                 all_f_set.append((c, count))
    45         k += 1
    46     return all_f_set
    47 
    48 if __name__ == '__main__':
    49     all_frequent_set = get_f_set()
    50     for i in all_frequent_set:
    51         print(i)

  • 相关阅读:
    go并发和并行
    goroutine
    go并发
    wampserver配置问题
    获取字符串的长度
    mysql中事件失效如何解决
    Go语言中Goroutine与线程的区别
    Mosquitto服务器的日志分析
    phpexcel导出数据 出现Formula Error的解决方案
    Centos6.X 手动升级gcc
  • 原文地址:https://www.cnblogs.com/laresh/p/7788713.html
Copyright © 2011-2022 走看看