zoukankan      html  css  js  c++  java
  • 实现Apriori算法(python)

      1 # coding: utf-8
      2 
      3 # 利用python实现apriori算法
      4 
      5 # In[1]:
      6 
      7 
      8 #导入需要的库
      9 from numpy import *
     10 
     11 
     12 # In[2]:
     13 
     14 
     15 def loadDataSet():
     16     return [[1,3,4],[2,3,5],[1,2,3,5],[2,5]]
     17 
     18 
     19 # In[3]:
     20 
     21 
     22 def createC1(dataSet):
     23     C1=[]
     24     for transaction in dataSet:
     25         for item in transaction:
     26             if not [item] in C1:
     27                 C1.append([item])
     28     C1.sort()
     29     return map(frozenset,C1)
     30 
     31 
     32 # In[4]:
     33 
     34 
     35 #计算Ck在数据集D中的支持度,并返回支持度大于minSupport的数据集
     36 def scanD(D,Ck,minSupport):
     37     ssCnt={}
     38     for tid in D:
     39         for can in Ck:
     40             if can.issubset(tid):
     41                 if can not in ssCnt.keys():
     42                     ssCnt[can]=1
     43                 else :
     44                     ssCnt[can]+=1
     45     numItems=float(len(D))
     46     retList=[]
     47     supportData={}
     48     for key in ssCnt:
     49         support=ssCnt[key]/numItems
     50         if support>= minSupport:
     51             retList.insert(0,key)
     52         supportData[key]=support
     53     return retList,supportData
     54 
     55 
     56 # In[15]:
     57 
     58 
     59 def aprioriGen(Lk,k):
     60     retList=[]
     61     lenLk=len(Lk)
     62     for i in range(lenLk):
     63         for j in range(i+1,lenLk):
     64             L1=list(Lk[i])[:k-2]
     65             L2=list(Lk[j])[:k-2]
     66             L1.sort()
     67             L2.sort()
     68             if L1==L2:
     69                 retList.append(Lk[i] | Lk[j])
     70     return retList
     71         
     72 
     73 
     74 # In[14]:
     75 
     76 
     77 def apriori(dataSet, minSupport=0.5):
     78     C1=createC1(dataSet)
     79     D=list(map(set,dataSet))
     80     print('D:',D)
     81     L1,supportData= scanD(D,C1,minSupport)
     82     L=[L1]
     83     k=2
     84     while (len(L[k-2])>0):
     85         Ck=aprioriGen(L[k-2], k)
     86         Lk,supK= scanD(D,Ck,minSupport)
     87         supportData.update(supK)
     88         if len(Lk)==0:
     89             break
     90         L.append(Lk)
     91         k+=1
     92     return L,supportData
     93 
     94 
     95 # In[19]:
     96 
     97 
     98 def calConf(freqSet,H,supportData,brl,minConf=0.7):
     99     prunedH=[]
    100     for conseq in H:
    101         conf=supportData[freqSet]/supportData[freqSet-conseq]
    102         if conf >= minConf:
    103             print(freqSet-conseq, '-->',conseq,'conf',conf)
    104             brl.append((freqSet-conseq,conseq,conf))
    105             prunedH.append(conseq)
    106     return prunedH
    107 
    108 
    109 # In[21]:
    110 
    111 
    112 def rulesFromConseq(freqSet,H,supportData,brl,minConf=0.7):
    113     m=len(H[0])
    114     if(len(freqSet)>(m+1)):
    115         Hmpl=aprioriGen(H,m+1)
    116         Hmpl=calConf(freqSet,Hmpl,supportData,brl,minConf)
    117         print('Hmpl=',Hmpl)
    118         print('len(Hmpl)=',len(Hmpl),'len(freqSet)=',len(freqSet))
    119         if(len(Hmpl)>1):
    120             rulesFromConseq(freqSet,Hmpl,supportData,brl,minConf)
    121 
    122 
    123 # In[9]:
    124 
    125 
    126 def generateRules(L,supportData,minConf=0.7):
    127     bigRuleList=[]
    128     for i in range(1,len(L)):
    129         for freqSet in L[i]:
    130             H1=[frozenset([item]) for item in freqSet]
    131             if(i>1):
    132                 rulesFromConseq(freqSet,H1,supportData,bigRuleList,minConf)
    133             else:
    134                 calConf(freqSet,H1,supportData,bigRuleList,minConf)
    135     return bigRuleList
    136 
    137 
    138 # In[10]:
    139 
    140 
    141 def testApriori():
    142     dataSet=loadDataSet()
    143     print('dataSet:',dataSet)
    144     L1,supportData1=apriori(dataSet,minSupport=0.7)
    145     print('L(0.7):',L1)
    146     print('supportData(0.7):',supportData1)
    147     print('------------------------------------------')
    148     L2,supportData2=apriori(dataSet,minSupport=0.5)
    149     print('L(0.5):',L2)
    150     print('supportData(0.5:).supportData2')
    151     print('------------------------------------------')
    152 
    153 
    154 # In[11]:
    155 
    156 
    157 def testGenerateRules():
    158     dataSet=loadDataSet()
    159     L1,supportData1=apriori(dataSet,minSupport=0.2)
    160     print('L(0.2):',L1)
    161     print('minSupport(0.2):',supportData1)
    162     rules=generateRules(L1,supportData1,minConf=1.1)
    163     print('Rules:',rules)
    164 
    165 
    166 # In[12]:
    167 
    168 
    169 def main():
    170     testApriori()
    171     testGenerateRules()
    172 
    173 
    174 # In[22]:
    175 
    176 
    177 if __name__=="__main__":
    178     main()

    参考:“机器学习实战-ApachCN”

  • 相关阅读:
    04-Go语言之运算符
    02-Go语言之变量和常量
    idea 无法加载识别本地类
    阿里云OSS实践篇
    jemeter 压测入门篇(附带工具)
    SpringBoot 中的那些“开关”
    java8 新特性之4大函数式接口
    java8 新特性之optional
    VSCode vue开发前配置
    前端架构演进及主流UI
  • 原文地址:https://www.cnblogs.com/share-sjb/p/9977803.html
Copyright © 2011-2022 走看看