zoukankan      html  css  js  c++  java
  • Titanic_test_one

    import pandas as pd
    import numpy as np
    import random as rnd
    
    import seaborn as sns
    import matplotlib.pyplot as plt
    %matplotlib inline
    
    from sklearn.linear_model import LogisticRegression
    from sklearn.svm import SVC, LinearSVC
    from sklearn.ensemble import RandomForestClassifier
    from sklearn.neighbors import KNeighborsClassifier
    from sklearn.naive_bayes import GaussianNB
    from sklearn.linear_model import Perceptron
    from sklearn.linear_model import SGDClassifier
    from sklearn.tree import DecisionTreeClassifier
    
     
    
    train_df = pd.read_csv('C:\Users\Liubotao\Desktop\train.csv')
    test_df = pd.read_csv('C:\Users\Liubotao\Desktop\test.csv')
    combine = [train_df, test_df]
    
     
    
    print("Before", train_df.shape, test_df.shape, combine[0].shape, combine[1].shape)
    
    train_df = train_df.drop(['Ticket', 'Cabin'], axis=1)
    test_df = test_df.drop(['Ticket', 'Cabin'], axis=1)
    combine = [train_df, test_df]
    
    "After", train_df.shape, test_df.shape, combine[0].shape, combine[1].shape
    
     
    
    for dataset in combine:
        dataset['Title'] = dataset.Name.str.extract(' ([A-Za-z]+).', expand=False)
    
    pd.crosstab(train_df['Title'], train_df['Sex'])
    
     
    for dataset in combine:
        dataset.loc[ dataset['Fare'] <= 7.91, 'Fare'] = 0
        dataset.loc[(dataset['Fare'] > 7.91) & (dataset['Fare'] <= 14.454), 'Fare'] = 1
        dataset.loc[(dataset['Fare'] > 14.454) & (dataset['Fare'] <= 31), 'Fare'] = 2
        dataset.loc[ dataset['Fare'] > 31, 'Fare'] = 3
        dataset['Fare'] = dataset['Fare'].astype(int)
    
    train_df = train_df.drop(['FareBand'], axis=1)
    
     
    
    X_train = train_df.drop("Survived", axis=1)
    Y_train = train_df["Survived"]
    X_test = test_df.drop("PassengerId", axis=1).copy()
    
     
    
    logreg = LogisticRegression()
    logreg.fit(X_train, Y_train)
    Y_pred = logreg.predict(X_test)
    
    submission = pd.DataFrame({"PassengerId": test_df["PassengerId"],"Survived": Y_pred})
    

      

  • 相关阅读:
    CAP原理、一致性模型、BASE理论和ACID特性
    MyBatis双数据源配置
    MySQL中间件Atlas安装及使用
    MySQL主从切换
    MySQL定时逻辑备份
    MySQL主从搭建
    zabbix监控nginx
    SVN Files 的值“ < < < < < < < .mine”无效。路径中具有非法字符。
    ie8下table的colspan属性与max-with属性的显示错乱问题
    MVC 自定义异常错误页面处理
  • 原文地址:https://www.cnblogs.com/L-BT/p/9772775.html
Copyright © 2011-2022 走看看