def GIS(): global C global FeaWeights for wid in WordDic.keys(): FeaWeights[wid] = {} for classid in ClassList: FeaWeights[wid][classid] = 0.0 n = 0 prelogllh = -1000000.0 logllh = -10000.0 while logllh - prelogllh >= LogLLDiff and n < MaxIteration: n += 1 prelogllh = logllh logllh = 0.0 print "Iteration", n for wid in WordDic.keys(): for classid in ClassList: FeaClassTable[wid][1][classid] = 0.0 #compute expected values of features subject to the model p(y|x) for doc in DocList: classProbs = [0.0] * len(ClassList) sum = 0.0 for i in range(len(ClassList)): classid = ClassList[i] pyx = 0.0 for wid in doc[0].keys(): pyx += FeaWeights[wid][classid] pyx = math.exp(pyx) classProbs[i] = pyx sum += pyx for i in range(len(ClassList)): classProbs[i] = classProbs[i] / sum for i in range(len(ClassList)): classid = ClassList[i] if classid == doc[1]: logllh += math.log(classProbs[i]) for wid in doc[0].keys(): FeaClassTable[wid][1][classid] += classProbs[i] * doc[0][wid] #update feature weights for wid in WordDic.keys(): for classid in ClassList: empValue = 0.0 if classid in FeaClassTable[wid][0]: empValue = FeaClassTable[wid][0][classid] modelValue = 0.0 if classid in FeaClassTable[wid][1]: modelValue = FeaClassTable[wid][1][classid] if empValue == 0.0 or modelValue == 0.0: continue FeaWeights[wid][classid] += math.log( FeaClassTable[wid][0][classid] / FeaClassTable[wid][1][classid]) / C print "Loglikelihood:", logllh return