zoukankan      html  css  js  c++  java
  • kaggle House_Price_XGBoost

    kaggle House_Price_final

    代码

    import numpy as np
    import pandas as pd
    from sklearn.ensemble import RandomForestRegressor
    from sklearn.metrics import mean_absolute_error
    from sklearn.model_selection import train_test_split
    from sklearn.preprocessing import Imputer
    from sklearn.linear_model import LinearRegression
    from sklearn.tree import DecisionTreeRegressor
    import matplotlib.pyplot as plt
    from sklearn.preprocessing import Imputer
    from xgboost import XGBRegressor
    
    train_path = r"C:UserscbattleDesktop	rain.csv"
    test_path = r"C:UserscbattleDesktop	est.csv"
    out_path = r"C:UserscbattleDesktopout.csv"
    
    # 读入数据
    train = pd.read_csv(train_path)
    test = pd.read_csv(test_path)
    print('train:',train.shape)
    print('test :',test.shape)
    
    # 划分X,y
    X = train.drop(['Id','SalePrice'],axis=1)
    y = train['SalePrice']
    Xtest = test.drop(['Id'],axis=1)
    print('X    :',X.shape)
    print('y    :',y.shape)
    print('Xtest:',Xtest.shape)
    # for col in X:
    #     print(X[col].dtype,col)
    
    key = [col for col in X
          if X[col].dtype in ['int64','float64']
          or X[col].dtype == 'object' and X[col].nunique()<10
          ]
    X = X[key]
    Xtest = Xtest[key]
    
    # 独热编码
    key = [col for col in X
          if X[col].dtype in ['int64','float64']
          or X[col].dtype == 'object' and X[col].nunique()<10
          ]
    X = X[key]
    Xtest = Xtest[key]
    
    print(X.shape, Xtest.shape)
    X = pd.get_dummies(X)
    Xtest = pd.get_dummies(Xtest)
    X, Xtest = X.align(Xtest, join = 'left', axis=1)
    print(X.shape, Xtest.shape)
    
    # 填补空值
    my_imputer = Imputer()
    X = my_imputer.fit_transform(X)
    Xtest = my_imputer.transform(Xtest)
    print(X.shape, Xtest.shape)
    
    # 决策树
    # decisionTree = DecisionTreeRegressor()
    # decisionTree.fit(X,y)
    # ans = decisionTree.predict(Xtest)
    
    # XG boost
    xgb = XGBRegressor()
    xgb.fit(X,y,verbose=False)
    ans = xgb.predict(Xtest)
    
    # my_model = XGBRegressor(n_estimators=1000)
    # my_model.fit(train_X, train_y, early_stopping_rounds=5, 
    #              eval_set=[(val_X, val_y)], verbose=False)
    # ans = my_model.predict(Xtest)
    
    # 输出
    myAns = pd.DataFrame({'Id':test['Id'],'SalePrice':ans})
    myAns.to_csv(r"C:UserscbattleDesktopout.csv", index=False)
    print('ok')
    
  • 相关阅读:
    python_之无参装饰器_01
    一 :接口自动化之 Rquests封装
    python --字典基本用法
    hadoop集群通过web管理界面只显示一个节点
    Hadoop基础(十八):MapReduce框架原理(二)切片机制(二)
    Hadoop基础(十七):MapReduce框架原理(一)切片机制(一)
    Hadoop基础(十六):Hadoop序列化
    Hadoop基础(十五): MapReduce概述
    Hadoop基础(十四): HDFS 2.X新特性
    Hadoop基础(十三): DataNode
  • 原文地址:https://www.cnblogs.com/cbattle/p/8810851.html
Copyright © 2011-2022 走看看