zoukankan      html  css  js  c++  java
  • sklearn 朴素贝叶斯

    朴素贝叶斯的sklearn
    1
    from sklearn import datasets 2 iris = datasets.load_iris() 3 from sklearn.naive_bayes import GaussianNB 4 gnb = GaussianNB() 5 y_pred = gnb.fit(iris.data, iris.target).predict(iris.data) 6 print("Number of mislabeled points out of a total %d points : %d" 7 % (iris.data.shape[0],(iris.target != y_pred).sum())) 8 9 #贝叶斯估计的,带平滑,默认alpha为1 即拉普拉斯平滑 10 from sklearn.naive_bayes import MultinomialNB 11 clf = MultinomialNB(alpha=0.5) 12 y_pred1=clf.fit(iris.data, iris.target).predict(iris.data) 13 print("Number of mislabeled points out of a total %d points : %d" 14 % (iris.data.shape[0],(iris.target != y_pred1).sum())) 15 16 a1=clf.fit(iris.data, iris.target).predict_proba(iris.data) 17 a2=clf.fit(iris.data, iris.target).predict_log_proba(iris.data)

     pr 曲线 roc曲线 auc得分

    print(__doc__)
    
    import sys
    import numpy as np
    import matplotlib.pyplot as plt
    from sklearn.metrics import roc_curve, auc, precision_recall_curve, roc_auc_score
    
    inputfile = sys.argv[1]
    
    label_list = []
    score_list = []
    with open(inputfile, 'r') as fd:
        for line in fd:
            fs = line.strip().split('    ')
            label = int(fs[0])
            score = float(fs[1])
            label_list.append(label)
            score_list.append(score)
    
    #roc曲线,假阳率fpr,真阳率tpr fpr, tpr, _ = roc_curve(label_list, score_list) auc = auc(fpr, tpr) #计算auc得分 auc_score=roc_auc_score(label_list, score_list) #pr曲线 precision, recall, _ = precision_recall_curve(label_list, score_list) ############################################################################## # Plot of a ROC curve for a specific class plt.figure() plt.plot(fpr, tpr) plt.plot([0, 1], [0, 1], 'k--') plt.xlim([0.0, 1.0]) plt.ylim([0.0, 1.05]) plt.xlabel('False Positive Rate') plt.ylabel('True Positive Rate') plt.title('ROC curve (auc = %.2f)' % auc) plt.legend(loc="lower right") plt.show() plt.figure() plt.plot(recall, precision) plt.xlim([0.0, 1.0]) plt.ylim([0.0, 1.05]) plt.xlabel('recall') plt.ylabel('precision') plt.title('Precision-Recall curve') plt.legend(loc="lower right") plt.show()

    计算auc的

    import numpy as np
    import matplotlib.pyplot as plt
    filepath='/home/hadoop/bigdata/nb/roi_auc_test/auc.raw'
    f=open(filepath)
    a=[]
    b=[]
    for line in f:
        ss=line.split('	')
        a.append([float(ss[0]),float(ss[1])])
    f.close()
    a=np.array(a)
    data=a.T
    data = data[:,data[1].argsort()]
    b1=data[0]
    b2=data[1]
    a=0.0
    x=0.0
    y=0.0
    for i in range(len(b1)):
        if b1[i]==-1:
            x+=1
            a+=y
        elif b1[i]==1:
            y+=1
        else:pass
    print(1-a/(x*y))

    根据定义计算auc

    import numpy as np
    import matplotlib.pyplot as plt
    filepath='/home/hadoop/bigdata/nb/roi_auc_test/auc.raw'
    f=open(filepath)
    a=[]
    for line in f:
        ss=line.split('	')
        a.append([float(ss[0]),float(ss[1])])
    f.close()
    a=np.array(a)
    b=a.T
    b1=b[0]
    b2=b[1]
    xmin,xmax=np.min(b2),np.max(b2)
    n=5000
    step=(xmax-xmin)/n
    x1=[]
    y1=[]
    for i in range(n+1):
        x= xmin + i * step
        index=np.where(b2<=x)
        tn=np.sum(b1[index]==-1)
        fn=np.sum(b1[index]==1)
        index1=np.where(b2>x)
        fp=np.sum(b1[index1]==-1)
        tp=np.sum(b1[index1]==1)
        yy=tp*1.0/(tp+fn)
        xx=fp*1.0/(fp+tn)
        x1.append(xx)
        y1.append(yy)
    data=np.array([x1,y1])
    data = data[:,data[0].argsort()]
    area=0.0
    for i in range(data.shape[1]-1):
        area += (data[0][i+1]-data[0][i])*(data[1][i]+data[1][i+1])/2
    print(area)
  • 相关阅读:
    PHP运行及语句及逻辑
    数据库基础,表及SQL语句
    php后台修改人员表信息
    php后台增加删除修改跳转页面
    用PHP访问数据库
    php登录注册页面及加载
    php做登录注册页面及加载
    实现基于物理的渲染
    Tile-Based Deferred Rendering
    矩阵基础 2
  • 原文地址:https://www.cnblogs.com/skyturtle/p/10178876.html
Copyright © 2011-2022 走看看