zoukankan      html  css  js  c++  java
  • Python大数据第三次的作业

    ---恢复内容开始---

    1.逻辑回归

    # 数据读取
    import pandas as pd
    default = pd.read_csv('Default.csv')
    print(default)
    
    ## 数据预处理
    ## 数值编码
    for item in ['student', 'default']:
        default[item] = default[item].replace({'No':0, 'Yes':1})
    print(default)
    ## Min-Max标准化
    default[['balance', 'income']] = default[['balance', 'income']].apply(lambda x:(x-x.min())/(x.max()-x.min()))
    print(default)
    
    ## 训练集测试集分割
    X = default.drop('default', axis=1)
    y = default['default']
    
    from sklearn.model_selection import train_test_split
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.15, random_state=10)
    
    # 建立模型
    from sklearn.linear_model import *
    model_LR = LogisticRegression(class_weight='balanced', random_state=10)
    
    # 训练模型
    model_LR.fit(X_train, y_train) 
    
    # 模型评价
    model_eval = model_LR.score(X_test, y_test)
    model_eval 
    # 导入需要的包
    import pandas as pd
    from sklearn.model_selection import train_test_split
    from sklearn.linear_model import LogisticRegression
    from sklearn.metrics import classification_report
    import seaborn as sns
    
    # 数据处理
    data = pd.read_csv('caesarian.csv')
    y = data['Caesarian']
    X = data.drop('Caesarian',axis=1)
    
    # 划分训练集与测试集
    x_train,x_test,y_train,y_test = train_test_split(X,y,test_size = 0.2,random_state = 10)
    
    # 建立逻辑回归模型
    clf = LogisticRegression(class_weight = 'balanced',random_state = 10,solver = 'sag')
    clf.fit(x_train,y_train)
    y_pred=clf.predict(x_test)
    
    # 分类正确率
    score = clf.score(x_test,y_test)
    print(score)
    
    # 分类报告
    report = classification_report(y_test, y_pred)
    print(report)
    
    

    2.朴素贝叶斯

    #######################################
    # 导入需要的包
    import pandas as pd
    from sklearn.model_selection import train_test_split
    from sklearn.naive_bayes import MultinomialNB
    
    # 数据处理
    data=pd.read_csv('caesarian.csv')
    y = data['Caesarian']
    X = data.drop('Caesarian',axis = 1) 
    
    # 划分训练集与测试集
    x_train,x_test,y_train,y_test = train_test_split(X,y,test_size=0.2,random_state=10)
    
    # 建立模型
    MNB = MultinomialNB(alpha = 0.01)
    clf = MNB.fit(x_train, y_train)
    
    # 输出准确率
    score = clf.score(x_test,y_test)
    print(score)
    
    
    #######################################
    # 导入需要的包
    import pandas as pd
    import numpy as np
    
    # 数据处理
    data = pd.read_csv('caesarian.csv')
    y = data['Caesarian']
    X = data[['Age','Delivery Number','Delivery time','Blood of Pressure','Heart Problem']]
    
    # 划分训练集与测试集
    from sklearn.model_selection import train_test_split
    x_train,x_test,y_train,y_test = train_test_split(X,y,test_size = 0.2,random_state = 10)
    
    # 按照线性核、多项式核、Sigmoid核、高斯核的顺序,分别选用不同的核函数
    from sklearn.svm import SVC
    
    kernels=['linear','poly','sigmoid','rbf']
    kernel_scores = []
    
    for kernel in kernels:
        kernel_score = SVC(kernel = kernel,random_state=10).fit(x_train,y_train).score(x_test,y_test)
        kernel_scores.append(kernel_score)
        
    print(kernel_scores)

    3.支持向量机

    # 导入需要的包
    import pandas as pd
    import numpy as np
    
    # 数据处理
    data=pd.read_csv('caesarian.csv')
    caesarian=data['Caesarian']
    feature=['Age','Delivery Number','Delivery time','Blood of Pressure','Heart Problem']
    traindata=data[feature]
    print(traindata.head(4))
    
    # 划分训练集与测试集
    from sklearn.model_selection import train_test_split
    x_train,x_test,y_train,y_test=train_test_split(traindata,caesarian,train_size=0.8,random_state=10)
    
    # 载入模型
    from sklearn.svm import LinearSVC
    
    # 建立模型
    lsvm=LinearSVC(C=0.68,random_state=10)
    
    # 训练模型
    lsvm.fit(x_train,y_train)
    
    # 模型评价
    score=lsvm.score(x_test,y_test)
    print(score)
    # 导入需要的包
    import pandas as pd
    from sklearn.model_selection import train_test_split
    from sklearn.svm import SVC
    
    # 数据处理
    data = pd.read_csv('caesarian.csv')
    y = data['Caesarian']
    X = data.drop('Caesarian',axis = 1)
    
    # 划分训练集与测试集
    x_train,x_test,y_train,y_test = train_test_split(X,y,test_size=0.2,random_state = 10)
    
    # 调整支持向量机模型的参数
    clf = SVC(kernel='linear',verbose = True)
    clf.fit(x_train,y_train)
    
    # 分类正确率
    score = clf.score(x_test,y_test)
    print(score)
    
    # 支持向量
    SV = clf.support_vectors_
    print('
    
    ',SV)
    
    # 正类和负类支持向量的索引
    S = clf.support_ 
    print('
    
    ',S)
    
    # 每个类支持向量的个数
    NS = clf.n_support_
    print('
    
    ',NS)
    
    # 超平面系数
    C = clf.coef_
    print('
    
    ',C)

    4.K近邻

    # 导入需要的包
    import pandas as pd
    import numpy as np
    
    # 数据处理
    data=pd.read_csv('caesarian.csv')
    caesarian=data['Caesarian']
    feature=['Age','Delivery Number','Delivery time','Blood of Pressure','Heart Problem']
    traindata=data[feature]
    
    # 划分训练集与测试集
    from sklearn.model_selection import train_test_split
    x_train,x_test,y_train,y_test=train_test_split(traindata,caesarian,train_size=0.8,random_state=10)
    
    # 导入模型
    from sklearn.neighbors import *
    
    # 输出分类正确率
    K_scores = [KNeighborsClassifier(n_neighbors=k).fit(x_train,y_train).score(x_test,y_test) for k in range(2,10,2)]
    print(K_scores)

    5.决策树

    #  数据处理
    data=pd.read_csv('caesarian.csv')
    caesarian=data['Caesarian']
    feature=['Age','Delivery Number','Delivery time','Blood of Pressure','Heart Problem']
    traindata=data[feature]
    
    # 划分训练集与测试集
    from sklearn.model_selection import *
    x_train,x_test,y_train,y_test=train_test_split(traindata, caesarian, test_size=0.2, random_state=10)
    
    # 训练模型
    from sklearn.tree import *
    DF_model = DecisionTreeClassifier(random_state=10)
    DF_model.fit(x_train, y_train)
    
    # 模型预测
    result = DF_model.predict(x_test)
    result = round(DF_model.score(x_test, y_test),4)
    print(result)
    # 查看各特征的重要性并绘图
    subplot=pd.Series(DF_model.feature_importances_, index=x_train.columns).sort_values().plot(kind='barh',  title='特征重要性')

    ---恢复内容结束---

  • 相关阅读:
    Effective C# Item6:明辨值类型和引用类型的使用场合
    Effective C# Item15:利用using和try/finally语句来清理资源
    Effective C# Item12:变量初始化器优于赋值语句
    Effective C# Item19:定义并实现接口优于继承类型
    Effective C# Item14:利用构造器链
    Effective C# Item18:实现标准Dispose模式
    Effective C# Item17:尽量减少装箱和拆箱
    Effective C# Item7:将值类型尽可能实现为具有常量性和原子性的类型
    Effective C# Item10:理解GetHashCode()方法的缺陷
    Effective C# Item20:明辨接口实现和虚方法重写
  • 原文地址:https://www.cnblogs.com/Wind-Flies/p/11694243.html
Copyright © 2011-2022 走看看