zoukankan      html  css  js  c++  java
  • 机器学习11—Apriori学习笔记

     votesmart下载  https://pypi.python.org/pypi/py-votesmart

    test11.py

    #-*- coding:utf-8
    import sys
    sys.path.append("apriori.py")
    
    import apriori
    from numpy import *
    
    # dataSet = apriori.loadDataSet()
    # print("dataSet:")
    # print(dataSet)
    #
    # C1 = apriori.createC1(dataSet)
    # print("C1:")
    # print(C1)
    #
    # D = list(map(set, dataSet))
    # print("D:")
    # print(D)
    #
    # L1, suppData0 = apriori.scanD(D, C1, 0.5)
    # print("L1:")
    # print(L1)
    # print("suppData0:")
    # print(suppData0)
    #
    #
    # L, suppData = apriori.apriori(dataSet)
    # print("L:")
    # print(L)
    #
    # L, suppData = apriori.apriori(dataSet, minSupport = 0.5)
    # rules = apriori.generateRules(L, suppData, minConf = 0.5)
    # print("L:")
    # print(L)
    # print("rules:")
    # print(rules)
    
    
    
    mushDatSet = [line.split() for line in open('mushroom.dat').readlines()]
    L, suppData = apriori.apriori(mushDatSet, minSupport = 0.3)
    print("L[1]:")
    print(L[1])
    for item in L[1]:
        if item.intersection('2'):
            print(item)
    
    
    
    print("over!!!")


    apriori.py
    '''
    Created on Mar 24, 2011
    Ch 11 code
    @author: Peter
    '''
    from numpy import *
    
    def loadDataSet():
        return [[1, 3, 4], [2, 3, 5], [1, 2, 3, 5], [2, 5]]
    
    def createC1(dataSet):
        C1 = []
        for transaction in dataSet:
            for item in transaction:
                if not [item] in C1:
                    C1.append([item])
                    
        C1.sort()
        return list(map(frozenset, C1))#use frozen set so we
                                #can use it as a key in a dict    
    
    def scanD(D, Ck, minSupport):
        ssCnt = {}
        for tid in D:
            for can in Ck:
                if can.issubset(tid):
                    if can not in ssCnt: ssCnt[can]=1
                    else: ssCnt[can] += 1
        numItems = float(len(D))
        retList = []
        supportData = {}
        for key in ssCnt:
            support = ssCnt[key]/numItems
            if support >= minSupport:
                retList.insert(0,key)
            supportData[key] = support
        return retList, supportData
    
    def aprioriGen(Lk, k): #creates Ck
        retList = []
        lenLk = len(Lk)
        for i in range(lenLk):
            for j in range(i+1, lenLk): 
                L1 = list(Lk[i])[:k-2]
                L2 = list(Lk[j])[:k-2]
                test0 = list(Lk[i])
                test1 = list(Lk[j])
                L1.sort()
                L2.sort()
                if L1==L2: #if first k-2 elements are equal
                    retList.append(Lk[i] | Lk[j]) #set union
        return retList
    
    def apriori(dataSet, minSupport = 0.5):
        C1 = createC1(dataSet)
        D = list(map(set, dataSet))
        L1, supportData = scanD(D, C1, minSupport)
        L = [L1]
        k = 2
        test0 = L[k-2]
        while (len(L[k-2]) > 0):
            Ck = aprioriGen(L[k-2], k)
            Lk, supK = scanD(D, Ck, minSupport)#scan DB to get Lk
            supportData.update(supK)
            L.append(Lk)
            k += 1
        return L, supportData
    
    def generateRules(L, supportData, minConf=0.7):  #supportData is a dict coming from scanD
        bigRuleList = []
        for i in range(1, len(L)):#only get the sets with two or more items
            for freqSet in L[i]:
                H1 = [frozenset([item]) for item in freqSet]
                if (i > 1):
                    rulesFromConseq(freqSet, H1, supportData, bigRuleList, minConf)
                else:
                    calcConf(freqSet, H1, supportData, bigRuleList, minConf)
        return bigRuleList         
    
    def calcConf(freqSet, H, supportData, brl, minConf=0.7):
        prunedH = [] #create new list to return
        for conseq in H:
            conf = supportData[freqSet]/supportData[freqSet-conseq] #calc confidence
            if conf >= minConf: 
                print(freqSet-conseq,'-->',conseq,'conf:',conf)
                brl.append((freqSet-conseq, conseq, conf))
                prunedH.append(conseq)
        return prunedH
    
    def rulesFromConseq(freqSet, H, supportData, brl, minConf=0.7):
        m = len(H[0])
        if (len(freqSet) > (m + 1)): #try further merging
            Hmp1 = aprioriGen(H, m+1)#create Hm+1 new candidates
            Hmp1 = calcConf(freqSet, Hmp1, supportData, brl, minConf)
            if (len(Hmp1) > 1):    #need at least two sets to merge
                rulesFromConseq(freqSet, Hmp1, supportData, brl, minConf)
    
    def pntRules(ruleList, itemMeaning):
        for ruleTup in ruleList:
            for item in ruleTup[0]:
                print(itemMeaning[item])
            print("           -------->")
            for item in ruleTup[1]:
                print(itemMeaning[item])
            print("confidence: %f" % ruleTup[2])
            print("----------------")#print a blank line
    
    
    from time import sleep
    from votesmart import votesmart
    votesmart.apikey = 'a7fa40adec6f4a77178799fae4441030'
    #votesmart.apikey = 'get your api key first'
    def getActionIds():
        actionIdList = []; billTitleList = []
        fr = open('recent20bills.txt')
        for line in fr.readlines():
            billNum = int(line.split('	')[0])
            try:
                billDetail = votesmart.votes.getBill(billNum) #api call
                for action in billDetail.actions:
                    if action.level == 'House' and 
                    (action.stage == 'Passage' or action.stage == 'Amendment Vote'):
                        actionId = int(action.actionId)
                        print('bill: %d has actionId: %d' % (billNum, actionId))
                        actionIdList.append(actionId)
                        billTitleList.append(line.strip().split('	')[1])
            except:
                print("problem getting bill %d" % billNum)
            sleep(1)                                      #delay to be polite
        return actionIdList, billTitleList
    
    def getTransList(actionIdList, billTitleList): #this will return a list of lists containing ints
        itemMeaning = ['Republican', 'Democratic']#list of what each item stands for
        for billTitle in billTitleList:#fill up itemMeaning list
            itemMeaning.append('%s -- Nay' % billTitle)
            itemMeaning.append('%s -- Yea' % billTitle)
        transDict = {}#list of items in each transaction (politician)
        voteCount = 2
        for actionId in actionIdList:
            sleep(3)
            print('getting votes for actionId: %d' % actionId)
            try:
                voteList = votesmart.votes.getBillActionVotes(actionId)
                for vote in voteList:
                    if not transDict.has_key(vote.candidateName):
                        transDict[vote.candidateName] = []
                        if vote.officeParties == 'Democratic':
                            transDict[vote.candidateName].append(1)
                        elif vote.officeParties == 'Republican':
                            transDict[vote.candidateName].append(0)
                    if vote.action == 'Nay':
                        transDict[vote.candidateName].append(voteCount)
                    elif vote.action == 'Yea':
                        transDict[vote.candidateName].append(voteCount + 1)
            except:
                print("problem getting actionId: %d" % actionId)
            voteCount += 2
        return transDict, itemMeaning


  • 相关阅读:
    20145220&20145209&20145309信息安全系统设计基础实验报告
    20145209 《信息安全系统设计基础》第8周学习总结
    R574
    gym102219
    102222F
    luogu 1337
    luogu 2503 & bzoj 2428
    18 BJ J
    poj 1981
    101992 I
  • 原文地址:https://www.cnblogs.com/Vae1990Silence/p/8620320.html
Copyright © 2011-2022 走看看