zoukankan      html  css  js  c++  java
  • GridSearchCV和RandomizedSearchCV调参

    1 GridSearchCV实际上可以看做是for循环输入一组参数后再比较哪种情况下最优.

    使用GirdSearchCV模板

    # Use scikit-learn to grid search the batch size and epochs
    import numpy
    from sklearn.model_selection import GridSearchCV
    from keras.models import Sequential
    from keras.layers import Dense
    from keras.wrappers.scikit_learn import KerasClassifier
    import pandas as pd
    import os
    os.environ["CUDA_VISIBLE_DEVICES"] = "1"
    # Function to create model, required for KerasClassifier
    def create_model(optimizer='adam'):
        # create model
        model = Sequential()
        model.add(Dense(12, input_dim=8, activation='relu'))
        model.add(Dense(1, activation='sigmoid'))
        # Compile model
        model.compile(loss='binary_crossentropy', optimizer=optimizer, metrics=['accuracy'])
        return model
    # fix random seed for reproducibility
    seed = 7
    numpy.random.seed(seed)
    # load dataset
    dataset = pd.read_csv('diabetes.csv', )
    # split into input (X) and output (Y) variables
    X = dataset[['Pregnancies', 'Glucose', 'BloodPressure', 'SkinThickness',
                 'Insulin','BMI', 'DiabetesPedigreeFunction', 'Age']]
    Y = dataset['Outcome']
    # create model
    model = KerasClassifier(build_fn=create_model, epochs=100, batch_size=10, verbose=0)
    # define the grid search parameters
    optimizer = ['SGD', 'RMSprop', 'Adagrad', 'Adadelta', 'Adam', 'Adamax', 'Nadam']
    param_grid = dict(optimizer=optimizer)
    grid = GridSearchCV(estimator=model, param_grid=param_grid, n_jobs=-1)
    grid_result = grid.fit(X, Y)
    # summarize results
    print("Best: %f using %s" % (grid_result.best_score_, grid_result.best_params_))
    print(grid_result)
    print('kkkk')
    print(grid_result.cv_results_)
    means = grid_result.cv_results_['mean_test_score']
    stds = grid_result.cv_results_['std_test_score']
    params = grid_result.cv_results_['params']
    for mean, stdev, param in zip(means, stds, params):
        print("%f (%f) with: %r" % (mean, stdev, param))
    View Code

    参考:https://machinelearningmastery.com/grid-search-hyperparameters-deep-learning-models-python-keras/

              https://blog.csdn.net/weixin_41988628/article/details/83098130

    2

    利用随机搜索实现鸢尾花调参,

    from sklearn.datasets import load_iris  # 自带的样本数据集
    from sklearn.neighbors import KNeighborsClassifier  # 要估计的是knn里面的参数,包括k的取值和样本权重分布方式
    import matplotlib.pyplot as plt  # 可视化绘图
    from sklearn.model_selection import GridSearchCV,RandomizedSearchCV  # 网格搜索和随机搜索
    import pandas as pd
    iris = pd.read_csv('../data/iris.csv', )
    print(iris.head())
    print(iris.columns)
    X = iris[['Sepal.Length', 'Sepal.Width', 'Petal.Length','Petal.Width']]  # 150个样本,4个属性
    y = iris['Species'] # 150个类标号
    
    k_range = range(1, 31)  # 优化参数k的取值范围
    weight_options = ['uniform', 'distance']  # 代估参数权重的取值范围。uniform为统一取权值,distance表示距离倒数取权值
    # 下面是构建parameter grid,其结构是key为参数名称,value是待搜索的数值列表的一个字典结构
    param_grid = {'n_neighbors':k_range,'weights':weight_options}  # 定义优化参数字典,字典中的key值必须是分类算法的函数的参数名
    print(param_grid)
    
    knn = KNeighborsClassifier(n_neighbors=5)  # 定义分类算法。n_neighbors和weights的参数名称和param_grid字典中的key名对应
    
    
    # ================================网格搜索=======================================
    # 这里GridSearchCV的参数形式和cross_val_score的形式差不多,其中param_grid是parameter grid所对应的参数
    # GridSearchCV中的n_jobs设置为-1时,可以实现并行计算(如果你的电脑支持的情况下)
    grid = GridSearchCV(estimator = knn, param_grid = param_grid, cv=10, scoring='accuracy') #针对每个参数对进行了10次交叉验证。scoring='accuracy'使用准确率为结果的度量指标。可以添加多个度量指标
    grid.fit(X, y)
    
    print('网格搜索-度量记录:',grid.cv_results_)  # 包含每次训练的相关信息
    print('网格搜索-最佳度量值:',grid.best_score_)  # 获取最佳度量值
    print('网格搜索-最佳参数:',grid.best_params_)  # 获取最佳度量值时的代定参数的值。是一个字典
    print('网格搜索-最佳模型:',grid.best_estimator_)  # 获取最佳度量时的分类器模型
    
    
    # 使用获取的最佳参数生成模型,预测数据
    knn = KNeighborsClassifier(n_neighbors=grid.best_params_['n_neighbors'], weights=grid.best_params_['weights'])  # 取出最佳参数进行建模
    knn.fit(X, y)  # 训练模型
    print(knn.predict([[3, 5, 4, 2]]))  # 预测新对象
    
    
    
    # =====================================随机搜索===========================================
    rand = RandomizedSearchCV(knn, param_grid, cv=10, scoring='accuracy', n_iter=10, random_state=5)  #
    rand.fit(X, y)
    
    print('随机搜索-度量记录:',grid.cv_results_)  # 包含每次训练的相关信息
    print('随机搜索-最佳度量值:',grid.best_score_)  # 获取最佳度量值
    print('随机搜索-最佳参数:',grid.best_params_)  # 获取最佳度量值时的代定参数的值。是一个字典
    print('随机搜索-最佳模型:',grid.best_estimator_)  # 获取最佳度量时的分类器模型
    
    
    # 使用获取的最佳参数生成模型,预测数据
    knn = KNeighborsClassifier(n_neighbors=grid.best_params_['n_neighbors'], weights=grid.best_params_['weights'])  # 取出最佳参数进行建模
    knn.fit(X, y)  # 训练模型
    print(knn.predict([[3, 5, 4, 2]]))  # 预测新对象
    
    
    # =====================================自定义度量===========================================
    from sklearn import metrics
    # 自定义度量函数
    def scorerfun(estimator, X, y):
        y_pred = estimator.predict(X)
        return metrics.accuracy_score(y, y_pred)
    
    rand = RandomizedSearchCV(knn, param_grid, cv=10, scoring='accuracy', n_iter=10, random_state=5)  #
    rand.fit(X, y)
    
    print('随机搜索-最佳度量值:',grid.best_score_)  # 获取最佳度量值
    View Code

    参考:https://blog.csdn.net/luanpeng825485697/article/details/79831703

  • 相关阅读:
    maven 历史版本下载地址
    eclipse 热部署
    在线代码练习
    Intellij热部署插件JRebel
    模拟数据生成器
    电脑读取U盘总提示格式化
    变形金刚
    slamdunk正在做菜
    丧心病狂的计数
    小明在工作
  • 原文地址:https://www.cnblogs.com/xxswkl/p/11072795.html
Copyright © 2011-2022 走看看