zoukankan      html  css  js  c++  java
  • kaggle入窝-

    #!/usr/bin/env python
    # coding: utf-8
    
    # # sklearn常用方法
    
    # # #标准化 (需要使用距离来度量相似性或用PCA降维时)
    
    # In[1]:
    
    
    from sklearn.preprocessing import StandardScaler
    data_train = StandardScaler().fit_transform(data_train)
    data_test = StandardScaler().fit_transform(data_test)
    
    
    # In[ ]:
    
    
    from sklearn.preprocessing import MinMaxScaler
    data = MinMaxScaler().fit_transform(data)
    
    
    # In[ ]:
    
    
    from sklearn.preprocessing import Normalizer
    data = Normalizer().fit_transform(data)
    
    
    # In[ ]:
    
    
    from sklearn.preprocessing import Binarizer
    data = Binarizer(threshold=epsilon).fit_transform(data)
    
    
    # # # 类别型特征转换为数值型特征:保留数值型特征,并将不同的类别转换为读热编码
    
    # In[ ]:
    
    
    from sklearn.feature_extraction import DictVectorizer
    vec = DictVectorizer(sparse=False)
    X_train = vec.fit_transform(X_train.to_dict(orient='recoed'))
    
    
    # # 卡方检验
    
    # In[ ]:
    
    
    from sklearn.feature_extraction import selectKBest
    from sklearn.feature_extraction import chi2
    # 选择K个最好的特征,返回选择特征后的数据
    skb = SelectKBest(chi2, k=10).fit(X_train, Y_train)
    X_train = skb.transform(X_train)
    X_test = skb.transform(X_test)
    
    
    # # 互信息法
    
    # In[2]:
    
    
    from sklearn.feature_extraction import SelectKBest
    from minepy import MINE
    # 由于MINE的设计不是函数式的,定义mic的方法将其设为函数式的,返回一个二元组,二元组的第2项设置成固定的P值0.5
    def mic(x, y):
        m = MINE()
        m.compute_score(x, y)
        return(m.mic(), 0.5)
    # 选择K个最好的特征,返回选择特征后的数据
    SelectKBest(lambda X, Y: array(map(lambda x:mic(x, Y), X.T)).T, k=2).fit_transform(iris.data, iris.taget)
    
    
    # In[ ]:
    
    
    from sklearn.decomposition import PCA
    estimator = PCA(n_components=2)
    X_pca = estimator.fit_transform(X_data)
    
    
    # # 开始算法
    
    # In[ ]:
    
    
    from sklearn.model_selection import train_test_split
    X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.25, random_state=7)
    
    
    # In[ ]:
    
    
    from sklearn.XXXXXXX import AlgorithmXXX
    alg = AlgorithmXXX()
    alg.fit(X_train, Y_train)
    y_predict = alg.predict(x_test)
    
    
    # In[ ]:
    
    
    from sklearn.linear_model import SGDClassifier
    sgd = SGDClassifier()
    
    
    # In[ ]:
    
    
    from sklearn.linear_model import SGDRegressor
    sgd = SGDRegressor(loss='squared_loss', penalty=None, random_state=7)
    
    
    # In[ ]:
    
    
    from sklearn.svm import SVR
    svr = SVR(kernel='linear')# linear/poly/ rbf
    
    
    # In[ ]:
    
    
    from sklearn.svm import SVC
    svc = SVC(kernel='linear')
    
    
    # In[ ]:
    
    
    from sklearn.naive_bayes import MultinomialNB
    mnb = MultinomialNB()
    
    
    # In[ ]:
    
    
    from sklearn.tree import DecisionTreeClassifier
    dtc = DecisionTreeClassifier(criterion='entropy', max_depth=3, min_samples_leaf=5)# 最大深度与最小样本数,防止过拟合
    
    
    # In[ ]:
    
    
    from sklearn.ensemble import RandomForestClassifier
    rfc = RandomForestClassifier(max_depth=3, min_samples_leaf=5)
    
    
    # In[ ]:
    
    
    from sklearn.ensemble import GradientBoostingClassifier
    gbc = GradientBoostingClassifier(max_depth=3, min_samples_leaf=5)
    
    
    # In[ ]:
    
    
    from sklearn.ensemble import ExtraTreesClassifier
    etr = ExtraTreesClassifier()
    
    
    # In[ ]:
    
    
    from sklearn import metrics
    accuracy_rate = metrics.accuracy_score(y_test, y_predict)
    metrics.classification_report(y_test, y_predict, target_names=data.target_names)
    
    
    # In[ ]:
    
    
    from sklearn.model_selection import KFold
    from sklearn.model_selection import cross_val_score
    kfold = KFold(n_splits=10, random_state=7)
    result = cross_val_score(model, X, Y, cv=kfold)
    
    
  • 相关阅读:
    python项目---数据可视化(02)
    python项目---数据可视化(01)
    sort 快排解决百万级的排序
    插入排序专题 直接插入 折半 希尔shell
    人见人爱A^B
    内部收益率
    台球碰撞
    杭电 1061 Rightmost Digit计算N^N次方的最后一位
    数字整除
    循环 未理解
  • 原文地址:https://www.cnblogs.com/2019-02-11/p/10674096.html
Copyright © 2011-2022 走看看