zoukankan      html  css  js  c++  java
  • 机器学习之-朴素贝叶斯简单实例

    import numpy as np
    import math
    
    #加载模拟数据
    def loaddata():
        postingList=[['my','dog','has','flea','problem','help','please'],
                     ['maybe','not','take','him','to','dog','park','stupid'],
                     ['my','dalmation','is','so','cute','I','love','him'],
                     ['stop','posting','stupid','worthless','garbage'],
                     ['mr','licks','ate','my','steak','how','to','stop','him'],
                     ['quit','buying','worthless','dog','food','stupid']]
        classVec = [0,1,0,1,0,1]            # 1 侮辱  0 非侮辱
        return postingList,classVec
    
    #创建词汇表
    def createSet(dataset):
        result = set([])
        for i in dataset:
            result = result | set(i)
        return list(result)
    
    # dataSet,labels = loaddata()
    # vacablist = createSet(dataSet)
    # print('外lables',labels)
    # print('外dataSet',dataSet)
    # print('外vacablist:',vacablist)
    
    #创建和词汇表对应的向量
    def setofword(vacablist,inputdata):
        mylist = [0] * len(vacablist)
        for word in inputdata:
           if word in vacablist:
               mylist[vacablist.index(word)] = 1
           else:
               print('没有 {} 这个词'.format(word))
        return mylist
    # setofdata = setofword(vacablist,dataSet[3])
    # print('外setofdata:',setofdata)                        #[1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0]
    # print(vacablist)                        #['garbage', 'to', 'worthless', 'ate', 'has', 'so', 'take', 'cute', 'dog', 'flea', 'buying', 'help', 'is', 'park', 'I', 'food', 'my', 'licks', 'posting', 'dalmation', 'problem', 'please', 'stop', 'how', 'stupid', 'maybe', 'love', 'steak', 'quit', 'him', 'not', 'mr']
    # print(dataSet[3])                       #['stop', 'posting', 'stupid', 'worthless', 'garbage']
    # trainmat = []
    # for i in dataSet:
    #     trainmat.append(setofword(vacablist,i))
    # print('外trainmat:',trainmat)
    
    # 训练函数,算P(word1)的概率
    def P1(trainmat,labels):
        plable_1 = sum(labels)/len(labels)
        data_0 = np.ones(len(trainmat[0]))
        count_0 = 2
        data_1 = np.ones(len(trainmat[0]))
        count_1 = 2
        for i in range(len(labels)):
            if labels[i] == 0:
                data_0 += trainmat[i]
                count_0 += sum(trainmat[i])
            if labels[i] == 1:
                data_1 += trainmat[i]
                count_1 += sum(trainmat[i])
    
        data_0 = data_0 / count_0
        data_1 = data_1/count_1
        print('data_0:{},count:{}'.format(data_0,count_0))
        print('data_1:{},count:{}'.format(data_1, count_1))
        print('plabel_1:',plable_1)
        return  data_0,data_1,plable_1
    
    # P1(trainmat,labels)
    #用得到的概率分类
    def classfy(testset,data_0,data_1,plabel_1):
        print('开始classfy')
        p1 = 1
        p0 = 1
        for i in range(len(testset)):
            if testset[i] ==1 :
                p1 = p1 * data_1[i]
                p0 = p0 * data_0[i]
        p1 = p1 * plabel_1
        p0 = p0 * (1-plabel_1)
        print('p1:{},p0:{}'.format(p1,p0))
        if p1>p0:
            print('该分类为1')
            return 1
        else:
            print('该分类为0')
            return 0
    
    #测试总逻辑代码
    def test():
        dataSet,labels = loaddata()
        vacablist = createSet(dataSet)
        trainmat = []
        for i in dataSet:                                   #因为训练函数需要训练数据是词汇表的格式
            trainmat.append(setofword(vacablist,i))
        data_0, data_1, plable_1 = P1(trainmat,labels)
        testlist = ['my','love','stupid']
        testdata = setofword(vacablist,testlist)
        classfy(testdata,data_0,data_1,plable_1)
    
    test()
  • 相关阅读:
    MyBatis Plus + Activiti 整合报错:org.springframework.beans.factory.UnsatisfiedDependencyException
    SQLTransientConnectionException: HikariPool-1
    深入分析Spring Boot2,解决 java.lang.ArrayStoreException异常
    vim命令
    Spring Security + JWT实现前后端分离权限认证
    前后端分离之JWT用户认证(转)
    MySQL高可用性之Keepalived+MySQL(双主热备)
    Linux下开启mysql数据库的远程访问权限
    Mysql主要索引方式:FULLTEXT,HASH,BTREE,RTREE。
    按照时间段查询日志文件
  • 原文地址:https://www.cnblogs.com/cxhzy/p/10652201.html
Copyright © 2011-2022 走看看