zoukankan      html  css  js  c++  java
  • Keras 使用多层感知器 预测泰坦尼克 乘客 生还概率

    # coding: utf-8
    
    # In[6]:
    
    
    # -*- coding: utf-8 -*-  
    import urllib.request
    import os
    
    
    # In[7]:
    
    
    url="http://biostat.mc.vanderbilt.edu/wiki/pub/Main/DataSets/titanic3.xls"
    filepath="data/titanic3.xls"
    if not os.path.isfile(filepath):
        result=urllib.request.urlretrieve(url,filepath)
        print('downloaded:',result)
    
    
    # In[8]:
    
    
    import numpy
    import pandas as pd
    
    
    # In[9]:
    
    
    all_df = pd.read_excel(filepath)
    
    
    # In[10]:
    
    
    all_df[:2]
    
    
    # In[11]:
    
    
    cols=['survived','name','pclass' ,'sex', 'age', 'sibsp',
          'parch', 'fare', 'embarked']
    all_df=all_df[cols]
    
    
    # In[12]:
    
    
    all_df[:2]
    
    
    # In[13]:
    
    
    all_df.isnull().sum()
    
    
    # In[14]:
    
    
    df=all_df.drop(['name'], axis=1)
    
    
    # In[15]:
    
    
    age_mean = df['age'].mean()
    df['age'] = df['age'].fillna(age_mean)
    
    
    # In[16]:
    
    
    fare_mean = df['fare'].mean()
    df['fare'] = df['fare'].fillna(fare_mean)
    
    
    # In[17]:
    
    
    df['sex']= df['sex'].map({'female':0, 'male': 1}).astype(int)
    
    
    # In[18]:
    
    
    df[:2]
    
    
    # In[19]:
    
    
    x_OneHot_df = pd.get_dummies(data=df,columns=["embarked" ])
    
    
    # In[20]:
    
    
    x_OneHot_df[:2]
    
    
    # In[21]:
    
    
    ndarray = x_OneHot_df.values
    
    
    # In[22]:
    
    
    ndarray.shape
    
    
    # In[23]:
    
    
    ndarray[:2]
    
    
    # In[26]:
    
    
    Label = ndarray[:,0]
    Features = ndarray[:,1:]
    
    
    # In[27]:
    
    
    Features.shape
    
    
    # In[28]:
    
    
    Features[:2]
    
    
    # In[29]:
    
    
    Label.shape
    
    
    # In[32]:
    
    
    Label[:2]
    
    
    # In[33]:
    
    
    from sklearn import preprocessing
    
    
    # In[34]:
    
    
    minmax_scale = preprocessing.MinMaxScaler(feature_range=(0, 1))
    
    
    # In[35]:
    
    
    scaledFeatures=minmax_scale.fit_transform(Features)
    
    
    # In[36]:
    
    
    scaledFeatures[:2]
    
    
    # In[37]:
    
    
    Label[:5]
    
    
    # In[38]:
    
    
    msk = numpy.random.rand(len(all_df)) < 0.8
    train_df = all_df[msk]
    test_df = all_df[~msk]
    
    
    # In[39]:
    
    
    print('total:',len(all_df),
          'train:',len(train_df),
          'test:',len(test_df))
    
    
    # In[40]:
    
    
    def PreprocessData(raw_df):
        df=raw_df.drop(['name'], axis=1)
        age_mean = df['age'].mean()
        df['age'] = df['age'].fillna(age_mean)
        fare_mean = df['fare'].mean()
        df['fare'] = df['fare'].fillna(fare_mean)
        df['sex']= df['sex'].map({'female':0, 'male': 1}).astype(int)
        x_OneHot_df = pd.get_dummies(data=df,columns=["embarked" ])
    
        ndarray = x_OneHot_df.values
        Features = ndarray[:,1:]
        Label = ndarray[:,0]
    
        minmax_scale = preprocessing.MinMaxScaler(feature_range=(0, 1))
        scaledFeatures=minmax_scale.fit_transform(Features)    
        
        return scaledFeatures,Label
    
    
    # In[41]:
    
    
    train_Features,train_Label=PreprocessData(train_df)
    test_Features,test_Label=PreprocessData(test_df)
    
    
    # In[42]:
    
    
    train_Features[:2]
    
    
    # In[43]:
    
    
    train_Label[:2]
    # coding: utf-8
    
    # In[1]:
    
    
    import numpy
    import pandas as pd
    from sklearn import preprocessing
    numpy.random.seed(10)
    
    
    # In[2]:
    
    
    all_df = pd.read_excel("data/titanic3.xls")
    
    
    # In[3]:
    
    
    cols=['survived','name','pclass' ,'sex', 'age', 'sibsp',
          'parch', 'fare', 'embarked']
    all_df=all_df[cols]
    
    
    # In[4]:
    
    
    msk = numpy.random.rand(len(all_df)) < 0.8
    train_df = all_df[msk]
    test_df = all_df[~msk]
    
    
    # In[5]:
    
    
    print('total:',len(all_df),
          'train:',len(train_df),
          'test:',len(test_df))
    
    
    # In[6]:
    
    
    def PreprocessData(raw_df):
        df=raw_df.drop(['name'], axis=1)
        age_mean = df['age'].mean()
        df['age'] = df['age'].fillna(age_mean)
        fare_mean = df['fare'].mean()
        df['fare'] = df['fare'].fillna(fare_mean)
        df['sex']= df['sex'].map({'female':0, 'male': 1}).astype(int)
        x_OneHot_df = pd.get_dummies(data=df,columns=["embarked" ])
    
        ndarray = x_OneHot_df.values
        Features = ndarray[:,1:]
        Label = ndarray[:,0]
    
        minmax_scale = preprocessing.MinMaxScaler(feature_range=(0, 1))
        scaledFeatures=minmax_scale.fit_transform(Features)    
        
        return scaledFeatures,Label
    
    
    # In[7]:
    
    
    train_Features,train_Label=PreprocessData(train_df)
    test_Features,test_Label=PreprocessData(test_df)
    
    
    # In[8]:
    
    
    from keras.models import Sequential
    from keras.layers import Dense,Dropout
    
    
    # In[9]:
    
    
    model = Sequential()
    
    
    # In[10]:
    
    
    model.add(Dense(units=40, input_dim=9, 
                    kernel_initializer='uniform', 
                    activation='relu'))
    
    
    # In[11]:
    
    
    model.add(Dense(units=30, 
                    kernel_initializer='uniform', 
                    activation='relu'))
    
    
    # In[12]:
    
    
    model.add(Dense(units=1, 
                    kernel_initializer='uniform',
                    activation='sigmoid'))
    
    
    # In[13]:
    
    
    model.compile(loss='binary_crossentropy', 
                  optimizer='adam', metrics=['accuracy'])
    
    
    # In[14]:
    
    
    train_history =model.fit(x=train_Features, 
                             y=train_Label, 
                             validation_split=0.1, 
                             epochs=30, 
                             batch_size=30,verbose=2)
    
    
    # In[15]:
    
    
    import matplotlib.pyplot as plt
    def show_train_history(train_history,train,validation):
        plt.plot(train_history.history[train])
        plt.plot(train_history.history[validation])
        plt.title('Train History')
        plt.ylabel(train)
        plt.xlabel('Epoch')
        plt.legend(['train', 'validation'], loc='upper left')
        plt.show()
    
    
    # In[16]:
    
    
    show_train_history(train_history,'acc','val_acc')
    
    
    # In[17]:
    
    
    show_train_history(train_history,'loss','val_loss')
    
    
    # In[18]:
    
    
    scores = model.evaluate(x=test_Features, 
                            y=test_Label)
    
    
    # In[19]:
    
    
    scores[1]
  • 相关阅读:
    @ControllerAdvice + @ExceptionHandler 使用
    将博客搬至CSDN
    Docker pull网络错误
    Centos7.5安装Docker
    Oracle18c创建不带C##的用户
    Centos7.5静默安装Oracle18c
    nodeJs和JavaScript的异同
    maven项目引入本地包,不使用中央仓库
    java中把指数形式的数字转为正常形式显示
    validateJarFile jar not loaded. See Servlet Spec 2.3, section 9.7.2. Offending class: javax/servlet/Servlet.class
  • 原文地址:https://www.cnblogs.com/IAMzhuxiaofeng/p/9136469.html
Copyright © 2011-2022 走看看