zoukankan      html  css  js  c++  java
  • sklearn中的朴素贝叶斯模型及其应用

    from sklearn import datasets
    iris=datasets.load_iris()
    from sklearn.naive_bayes import GaussianNB
    gnb=GaussianNB()
    pred=gnb.fit(iris.data,iris.target)
    y_pred=pred.predict(iris.data)
    print(iris.data.shape[0],(iris.target!=y_pred).sum())
    
    150 6
    
    
    
    iris.target
    
    
    array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
           0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
           0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
           1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
           1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
           2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
           2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2])
    
    
    
    y_pred
    
    array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
           0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
           0, 0, 0, 0, 0, 0, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
           1, 1, 1, 1, 2, 1, 1, 1, 1, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
           1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 1, 2, 2, 2,
           2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
           2, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2])
    
    
    
    from sklearn import datasets
    iris=datasets.load_iris()
    from sklearn.naive_bayes import BernoulliNB
    gnb=BernoulliNB()
    pred=gnb.fit(iris.data,iris.target)
    y_pred=pred.predict(iris.data)
    print(iris.data.shape[0],(iris.target!=y_pred).sum())
    
    
    150 100
    
    
    iris.target
    
    array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
           0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
           0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
           1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
           1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
           2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
           2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2])
    
    
    y_pred
    
    array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
           0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
           0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
           0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
           0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
           0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
           0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0])
    
    
    
    from sklearn import datasets
    iris=datasets.load_iris()
    from sklearn.naive_bayes import  MultinomialNB
    gnb= MultinomialNB()
    pred=gnb.fit(iris.data,iris.target)
    y_pred=pred.predict(iris.data)
    print(iris.data.shape[0],(iris.target!=y_pred))
    
    150 [False False False False False False False False False False False False
     False False False False False False False False False False False False
     False False False False False False False False False False False False
     False False False False False False False False False False False False
     False False False False False False False False False False False False
     False False False False False False False False  True False  True False
      True False False False False False False False False False False  True
     False False False False False False False False False False False False
     False False False False False False False False False False False False
     False False False False False False False False False False False False
     False False False False False False False False False  True False  True
     False  True False False False False False False False False False False
     False False False False False False]
    
    
    iris.target
    
    array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
           0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
           0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
           1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
           1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
           2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
           2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2])
    
    y_pred
    
    array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
           0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
           0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
           1, 1, 2, 1, 2, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 1, 1,
           1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
           2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 2, 1,
           2, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2])
    
    
    from  sklearn.naive_bayes import GaussianNB
    from sklearn.model_selection import cross_val_score
    gnb=GaussianNB()
    scores=cross_val_score(gnb,iris.data,iris.target,cv=10)
    print("Accuracy:%.15f"%scores.mean())
    
    
    Accuracy:0.953333333333333
    
    scores
    array([0.93333333, 0.93333333, 1.        , 0.93333333, 0.93333333,
           0.93333333, 0.86666667, 1.        , 1.        , 1.        ])
    
    from sklearn.naive_bayes import BernoulliNB
    from sklearn.model_selection import cross_val_score
    gnb=BernoulliNB()
    scores=cross_val_score(gnb,iris.data,iris.target,cv=10)
    print("Acdcuracy:%.3f"%scores.mean())
    
    Acdcuracy:0.333
    
    scores
    array([0.33333333, 0.33333333, 0.33333333, 0.33333333, 0.33333333,
           0.33333333, 0.33333333, 0.33333333, 0.33333333, 0.33333333])
    
    from sklearn.naive_bayes import MultinomialNB
    from sklearn.model_selection import cross_val_score
    gnb=MultinomialNB()
    scores=cross_val_score(gnb,iris.data,iris.target,cv=10)
    print("Acdcuracy:%.15f"%scores.mean())
    
    Acdcuracy:0.953333333333333
    
    
    scores
    
    array([1.        , 1.        , 1.        , 0.93333333, 0.86666667,
           0.93333333, 0.8       , 1.        , 1.        , 1.        ])
    
    
    import csv
    with open(r'd:/SMSSpamCollectionjsn.txt',encoding = "utf-8")as file_path:
    # with open('C:UsersAdministratorDesktopSMSSpamCollection.csv','r',encoding='utf-8')as file_path:
        sms=file_path.read()
    # print(sms)
    sms_data=[]
    sms_label=[]
    reader=csv.reader(sms,delimiter='	')
    for  line in reader:
        sms_label.append(line[0])
        sms_data.append(line[1])
    sms.colse()
     sms_data
    
    
    
    cc=sms.replace('.',' ')
    cclist=sms.split()
    print(len(cc),cclist)
    ccset=set(cclist)
    print(ccset)
    strDict={}
    for star in ccset:
        strDict[star]=sms.count(star)
    for key in ccset:
        print(key,strDict[key])
    wclist=list(ccsetr.items())
    print(wclist)
    def takeSecond(elem):
        return elem[1]
    wclist.sort(key=takeSecond,reverse=True)
    print(wclist)
    
    
    
    ',', 'I', 'need', 'you,', 'I', 'crave', 'you', '...', 'But', 'most', 'of', 'all', '...', 'I', 'love', 'you', 'my', 'sweet', 'Arabian', 'steed', '...', 'Mmmmmm', '...', 'Yummy"', 'spam', '07732584351', '-', 'Rodger', 'Burns', '-', 'MSG', '=', 'We', 'tried', 'to', 'call', 'you', 're', 'your', 'reply', 'to', 'our', 'sms', 'for', 'a', 'free', 'nokia', 'mobile', '+', 'free', 'camcorder.', 'Please', 'call', 'now', '08000930705', 'for', 'delivery', 'tomorrow', 'ham', 'WHO', 'ARE', 'YOU', 'SEEING?', 'ham', 'Great!', 'I', 'hope', 'you', 'like', 'your', 'man', 'well', 'endowed.', 'I', 'am', '<#>', 'inches...', 'ham', 'No', 'calls..messages..missed', 'calls', 'ham', "Didn't", 'you', 'get', 'hep', 'b', 'immunisation', 'in', 'nigeria.', 'ham', '"Fair', 'enough,', 'anything', 'going', 'on?"', 'ham', '"Yeah', 'hopefully,', 'if', 'tyler', "can't", 'do', 'it', 'I', 'could', 'maybe', 'ask', 'around', 'a', 'bit"', 'ham', 'U', "don't", 'know', 'how', 'stubborn', 'I', 'am.', 'I', "didn't", 'even', 'want', 'to', 'go', 'to', 'the', 'hospital.', 'I', 'kept', 'telling', 'Mark', "I'm", 'not', 'a', 'weak', 'sucker.', 'Hospitals', 'are', 'for', 'weak', 'suckers.', 'ham', 'What', 'you', 'thinked', 'about', 'me.', 'First', 'time', 'you', 'saw', 'me', 'in', 'class.', 'ham', '"A', 'gram', 'usually', 'runs', 'like', '<#>', ',', 'a', 'half', 'eighth', 'is', 'smarter', 'though'
    
    
    
    from nltk.corpus import stopwords
    stops=stopwords.words('english')
    stops
    
    
    ['i',
     'me',
     'my',
     'myself',
     'we',
     'our',
     'ours',
     'ourselves',
     'you',
     "you're",
     "you've",
     "you'll",
     "you'd",
     'your',
     'yours',
     'yourself',
     'yourselves',
     'he',
     'him',
     'his',
     'himself',
     'she',
    
  • 相关阅读:
    [BUUOJ记录] [ZJCTF 2019]NiZhuanSiWei
    [BUUOJ记录] [BJDCTF2020]EasySearch
    [BUUOJ记录] [BJDCTF2020]The mystery of ip
    php版本:实现过滤掉广告、色情、政治相关的敏感词
    热门搜索词获取java版
    如何用代码挖局长尾关键词
    几行python代码解决相关词联想
    如何高效的完成中文分词?
    python 脚本撞库国内“某榴”账号
    搜索引擎之全文搜索算法功能实现(基于Lucene)
  • 原文地址:https://www.cnblogs.com/cc013/p/10028636.html
Copyright © 2011-2022 走看看