zoukankan      html  css  js  c++  java
  • sklearn中的朴素贝叶斯模型及其应用

    from sklearn import datasets
    iris=datasets.load_iris()
    from sklearn.naive_bayes import GaussianNB
    gnb=GaussianNB()
    pred=gnb.fit(iris.data,iris.target)
    y_pred=pred.predict(iris.data)
    print(iris.data.shape[0],(iris.target!=y_pred).sum())
    
    150 6
    
    
    
    iris.target
    
    
    array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
           0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
           0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
           1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
           1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
           2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
           2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2])
    
    
    
    y_pred
    
    array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
           0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
           0, 0, 0, 0, 0, 0, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
           1, 1, 1, 1, 2, 1, 1, 1, 1, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
           1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 1, 2, 2, 2,
           2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
           2, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2])
    
    
    
    from sklearn import datasets
    iris=datasets.load_iris()
    from sklearn.naive_bayes import BernoulliNB
    gnb=BernoulliNB()
    pred=gnb.fit(iris.data,iris.target)
    y_pred=pred.predict(iris.data)
    print(iris.data.shape[0],(iris.target!=y_pred).sum())
    
    
    150 100
    
    
    iris.target
    
    array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
           0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
           0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
           1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
           1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
           2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
           2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2])
    
    
    y_pred
    
    array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
           0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
           0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
           0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
           0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
           0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
           0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0])
    
    
    
    from sklearn import datasets
    iris=datasets.load_iris()
    from sklearn.naive_bayes import  MultinomialNB
    gnb= MultinomialNB()
    pred=gnb.fit(iris.data,iris.target)
    y_pred=pred.predict(iris.data)
    print(iris.data.shape[0],(iris.target!=y_pred))
    
    150 [False False False False False False False False False False False False
     False False False False False False False False False False False False
     False False False False False False False False False False False False
     False False False False False False False False False False False False
     False False False False False False False False False False False False
     False False False False False False False False  True False  True False
      True False False False False False False False False False False  True
     False False False False False False False False False False False False
     False False False False False False False False False False False False
     False False False False False False False False False False False False
     False False False False False False False False False  True False  True
     False  True False False False False False False False False False False
     False False False False False False]
    
    
    iris.target
    
    array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
           0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
           0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
           1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
           1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
           2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
           2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2])
    
    y_pred
    
    array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
           0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
           0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
           1, 1, 2, 1, 2, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 1, 1,
           1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
           2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 2, 1,
           2, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2])
    
    
    from  sklearn.naive_bayes import GaussianNB
    from sklearn.model_selection import cross_val_score
    gnb=GaussianNB()
    scores=cross_val_score(gnb,iris.data,iris.target,cv=10)
    print("Accuracy:%.15f"%scores.mean())
    
    
    Accuracy:0.953333333333333
    
    scores
    array([0.93333333, 0.93333333, 1.        , 0.93333333, 0.93333333,
           0.93333333, 0.86666667, 1.        , 1.        , 1.        ])
    
    from sklearn.naive_bayes import BernoulliNB
    from sklearn.model_selection import cross_val_score
    gnb=BernoulliNB()
    scores=cross_val_score(gnb,iris.data,iris.target,cv=10)
    print("Acdcuracy:%.3f"%scores.mean())
    
    Acdcuracy:0.333
    
    scores
    array([0.33333333, 0.33333333, 0.33333333, 0.33333333, 0.33333333,
           0.33333333, 0.33333333, 0.33333333, 0.33333333, 0.33333333])
    
    from sklearn.naive_bayes import MultinomialNB
    from sklearn.model_selection import cross_val_score
    gnb=MultinomialNB()
    scores=cross_val_score(gnb,iris.data,iris.target,cv=10)
    print("Acdcuracy:%.15f"%scores.mean())
    
    Acdcuracy:0.953333333333333
    
    
    scores
    
    array([1.        , 1.        , 1.        , 0.93333333, 0.86666667,
           0.93333333, 0.8       , 1.        , 1.        , 1.        ])
    
    
    import csv
    with open(r'd:/SMSSpamCollectionjsn.txt',encoding = "utf-8")as file_path:
    # with open('C:UsersAdministratorDesktopSMSSpamCollection.csv','r',encoding='utf-8')as file_path:
        sms=file_path.read()
    # print(sms)
    sms_data=[]
    sms_label=[]
    reader=csv.reader(sms,delimiter='	')
    for  line in reader:
        sms_label.append(line[0])
        sms_data.append(line[1])
    sms.colse()
     sms_data
    
    
    
    cc=sms.replace('.',' ')
    cclist=sms.split()
    print(len(cc),cclist)
    ccset=set(cclist)
    print(ccset)
    strDict={}
    for star in ccset:
        strDict[star]=sms.count(star)
    for key in ccset:
        print(key,strDict[key])
    wclist=list(ccsetr.items())
    print(wclist)
    def takeSecond(elem):
        return elem[1]
    wclist.sort(key=takeSecond,reverse=True)
    print(wclist)
    
    
    
    ',', 'I', 'need', 'you,', 'I', 'crave', 'you', '...', 'But', 'most', 'of', 'all', '...', 'I', 'love', 'you', 'my', 'sweet', 'Arabian', 'steed', '...', 'Mmmmmm', '...', 'Yummy"', 'spam', '07732584351', '-', 'Rodger', 'Burns', '-', 'MSG', '=', 'We', 'tried', 'to', 'call', 'you', 're', 'your', 'reply', 'to', 'our', 'sms', 'for', 'a', 'free', 'nokia', 'mobile', '+', 'free', 'camcorder.', 'Please', 'call', 'now', '08000930705', 'for', 'delivery', 'tomorrow', 'ham', 'WHO', 'ARE', 'YOU', 'SEEING?', 'ham', 'Great!', 'I', 'hope', 'you', 'like', 'your', 'man', 'well', 'endowed.', 'I', 'am', '<#>', 'inches...', 'ham', 'No', 'calls..messages..missed', 'calls', 'ham', "Didn't", 'you', 'get', 'hep', 'b', 'immunisation', 'in', 'nigeria.', 'ham', '"Fair', 'enough,', 'anything', 'going', 'on?"', 'ham', '"Yeah', 'hopefully,', 'if', 'tyler', "can't", 'do', 'it', 'I', 'could', 'maybe', 'ask', 'around', 'a', 'bit"', 'ham', 'U', "don't", 'know', 'how', 'stubborn', 'I', 'am.', 'I', "didn't", 'even', 'want', 'to', 'go', 'to', 'the', 'hospital.', 'I', 'kept', 'telling', 'Mark', "I'm", 'not', 'a', 'weak', 'sucker.', 'Hospitals', 'are', 'for', 'weak', 'suckers.', 'ham', 'What', 'you', 'thinked', 'about', 'me.', 'First', 'time', 'you', 'saw', 'me', 'in', 'class.', 'ham', '"A', 'gram', 'usually', 'runs', 'like', '<#>', ',', 'a', 'half', 'eighth', 'is', 'smarter', 'though'
    
    
    
    from nltk.corpus import stopwords
    stops=stopwords.words('english')
    stops
    
    
    ['i',
     'me',
     'my',
     'myself',
     'we',
     'our',
     'ours',
     'ourselves',
     'you',
     "you're",
     "you've",
     "you'll",
     "you'd",
     'your',
     'yours',
     'yourself',
     'yourselves',
     'he',
     'him',
     'his',
     'himself',
     'she',
    
  • 相关阅读:
    Linux命令-网络命令:netstat
    Linux命令-网络命令:traceroute
    Linux命令-网络命令:lastlog
    Linux命令-网络命令:last
    mongodb3.4 安装及用户名密码设置
    MySQL表名不区分大小写的设置方法
    数据库设计中的四个范式
    dubbo本地调试直连
    com.mysql.jdbc.PacketTooBigException: Packet for query is too large (1169 > 1024)
    Linux服务器时间同步
  • 原文地址:https://www.cnblogs.com/cc013/p/10028636.html
Copyright © 2011-2022 走看看