zoukankan      html  css  js  c++  java
  • 多算法融合2_SVM

    #支持向量机算法原理及实现
    #(一)sklearn中利用SVM算法解决分类问题
    import numpy as np
    import matplotlib.pyplot as plt

    #1-1 多算法融合思想的使用——KNN算法参数寻优
    from sklearn.feature_selection import SelectKBest
    import numpy as np
    import pandas as pd
    from sklearn.model_selection import KFold #交叉验证Kfold方式
    from sklearn.neighbors import KNeighborsClassifier
    from sklearn.metrics import accuracy_score #导入整体模型的准确度
    from sklearn.metrics import confusion_matrix #导入整体模型的混淆矩阵
    from sklearn.metrics import precision_score #导入整体模型的精准率
    from sklearn.metrics import recall_score #导入整体模型的召回率
    from sklearn.metrics import f1_score
    #利用管道pipeline来进行多项式核函数的SVM算法三步—多项式回归特征增加-数据归一化-线性SVM算法
    from sklearn.preprocessing import PolynomialFeatures #输入多项式回归模型
    from sklearn.preprocessing import StandardScaler
    from sklearn.svm import LinearSVC
    from sklearn.pipeline import Pipeline
    #导入所需要训练的数据集
    finaldata=pd.read_excel("C:/Users/y50014900/Desktop/过程测试_033GRR10L4105623_20200601-20200708_IL_DM_异常检测分类结果.xlsx")
    feature=["p1","p2","p3","p4","p5","p6","p7","p8","p9","p10","p11","p12","p13","p14","p15","p16","p17","p18","p19","p20","p21","p22","p23"]
    DM_target1=["DM1"]
    DM_target2=["DM2"]
    x=finaldata.iloc[:,2:71]
    print(x)
    x=np.array(x) #对数据的输入需要进行numpy二维数组的转换和形式统一
    y=finaldata[DM_target1].values.ravel() #将表格中的目标列向量转换为一维的数组,作为目标预测的向量
    y=finaldata[DM_target2].values.ravel()

    #进行数据的numpy数据形式转换,为算法的数据输入做好准备工作
    #首先第一步需要进行数据据标准化处理(线性方式)
    '''
    from sklearn.preprocessing import StandardScaler
    s1=StandardScaler()
    s1.fit(x)
    x_standard=s1.transform(x)
    from sklearn.model_selection import train_test_split
    x_train,x_test,y_train,y_test=train_test_split(x,y,test_size=0.2,random_state=666)

    #1-1导入sklearn中SVM的线性分类算法LinearSVC,处理原有的线性数据
    from sklearn.preprocessing import StandardScaler
    s1=StandardScaler()
    s1.fit(x)
    x=s1.transform(x)
    from sklearn.model_selection import train_test_split
    x_train,x_test,y_train,y_test=train_test_split(x,y,test_size=0.2,random_state=666)
    from sklearn.svm import LinearSVC
    s11=LinearSVC(C=1e10) #需要定义超参数C,L1、L2正则化的系数,越大,容错空间越小
    #对于多分类问题的实现,需要提交参数penalty=l1/l2(正则化方式)以及multi_class=ovo/ovr(采用何种方式多分类训练)
    #LinearSVC默认方式为L2正则化,多分类为ovr模式

    s11.fit(x_train,y_train) #训练数据集训练归一化数据集
    print(s11.score(x_test,y_test))

    #改变正则化的系数C的大小,C越小,容错空间越大
    s12=LinearSVC(C=1) #C变小之后,容错空间增大,会有部分数据区分错误
    s12.fit(x_train,y_train) #训练数据集训练归一化数据集
    print(s12.score(x_test,y_test))

    #1-2 sklearn中对于非线性数据的svm应用(多项式应用方式)
    #SVM使用非线性数据假设的模型-手动添加多项式特征模型
    #利用管道pipeline来进行多项式核函数的SVM算法三步—多项式回归特征增加-数据归一化-线性SVM算法
    from sklearn.preprocessing import PolynomialFeatures #输入多项式回归模型
    from sklearn.preprocessing import StandardScaler
    from sklearn.svm import LinearSVC
    from sklearn.pipeline import Pipeline
    from sklearn.model_selection import train_test_split
    x_train,x_test,y_train,y_test=train_test_split(x,y,test_size=0.2,random_state=666)
    def polyniomailSVC(degree,C=10): #默认正则化系数C为1
    return Pipeline([("poly",PolynomialFeatures(degree=degree)),
    ("std_scaler",StandardScaler()),
    ("LinearSVC",LinearSVC(C=C))
    ])
    for i in range(1,3):
    for C in range(1,10):
    p=polyniomailSVC(degree=i,C=C) #使用三次的多项式特征进行模型的训练
    p.fit(x_train,y_train)
    print(p.score(x_test,y_test))



    #1-3 使用自带的多项式核函数的SVM,将数据先直接转换为多项式的多维特征,和传统的多项式特征不同
    #2直接利用sklearn中自带的多项式核函数SVM算法,可以自动添加多项式的特征,主要的参数kernel="poly"
    from sklearn.model_selection import train_test_split
    x_train,x_test,y_train,y_test=train_test_split(x,y,test_size=0.2,random_state=666)
    from sklearn.svm import SVC
    def polynomialkernelSVC(degree,C=1.0):
    return Pipeline(
    [
    ("std_canler",StandardScaler()),
    ("kernelsvc",SVC(kernel="poly",degree=degree,C=C))
    ]
    )
    for i in range(1,5):
    for j in range(1,10):
    p1=polynomialkernelSVC(degree=i,C=j)
    p1.fit(x_train,y_train)
    print(p1.score(x_test,y_test))
    '''

    #1-4 高斯核函数的SVM算法的使用-非线性数据训练模型
    #调用sklearn中的高斯核函数RBF核(超参数主要是gamma)决定了模型的复杂度,gamma越高,越过拟合
    from sklearn.svm import SVC
    from sklearn.pipeline import Pipeline
    from sklearn.model_selection import train_test_split
    import numpy as np
    x_train,x_test,y_train,y_test=train_test_split(x,y,test_size=0.2,random_state=666)
    def RBFkernelSVC(gamma):
    return Pipeline([
    ("std",StandardScaler()),
    ("svc",SVC(kernel="rbf",gamma=gamma))
    ])
    for i in np.arange(0.1,10,1):
    sv=RBFkernelSVC(gamma=i)
    sv.fit(x_train,y_train)
    print(sv.score(x_test,y_test))

  • 相关阅读:
    CORS
    ant design vue table 选择当前数据,要如下传
    Web Components
    slot-scope Element-ui 的 slot 关系理解
    Node.js child_process模块中的spawn和exec方法
    node.js关于sendFile的路径问题,以及与send的区别
    uni-app使用uni.onShareAppMessage不生效
    小程序地理定位qqmap-wx-jssdk.js
    L1-009 N个数求和
    L1-008 求整数段和
  • 原文地址:https://www.cnblogs.com/Yanjy-OnlyOne/p/13403964.html
Copyright © 2011-2022 走看看