# coding: utf-8 # In[6]: # -*- coding: utf-8 -*- import urllib.request import os # In[7]: url="http://biostat.mc.vanderbilt.edu/wiki/pub/Main/DataSets/titanic3.xls" filepath="data/titanic3.xls" if not os.path.isfile(filepath): result=urllib.request.urlretrieve(url,filepath) print('downloaded:',result) # In[8]: import numpy import pandas as pd # In[9]: all_df = pd.read_excel(filepath) # In[10]: all_df[:2] # In[11]: cols=['survived','name','pclass' ,'sex', 'age', 'sibsp', 'parch', 'fare', 'embarked'] all_df=all_df[cols] # In[12]: all_df[:2] # In[13]: all_df.isnull().sum() # In[14]: df=all_df.drop(['name'], axis=1) # In[15]: age_mean = df['age'].mean() df['age'] = df['age'].fillna(age_mean) # In[16]: fare_mean = df['fare'].mean() df['fare'] = df['fare'].fillna(fare_mean) # In[17]: df['sex']= df['sex'].map({'female':0, 'male': 1}).astype(int) # In[18]: df[:2] # In[19]: x_OneHot_df = pd.get_dummies(data=df,columns=["embarked" ]) # In[20]: x_OneHot_df[:2] # In[21]: ndarray = x_OneHot_df.values # In[22]: ndarray.shape # In[23]: ndarray[:2] # In[26]: Label = ndarray[:,0] Features = ndarray[:,1:] # In[27]: Features.shape # In[28]: Features[:2] # In[29]: Label.shape # In[32]: Label[:2] # In[33]: from sklearn import preprocessing # In[34]: minmax_scale = preprocessing.MinMaxScaler(feature_range=(0, 1)) # In[35]: scaledFeatures=minmax_scale.fit_transform(Features) # In[36]: scaledFeatures[:2] # In[37]: Label[:5] # In[38]: msk = numpy.random.rand(len(all_df)) < 0.8 train_df = all_df[msk] test_df = all_df[~msk] # In[39]: print('total:',len(all_df), 'train:',len(train_df), 'test:',len(test_df)) # In[40]: def PreprocessData(raw_df): df=raw_df.drop(['name'], axis=1) age_mean = df['age'].mean() df['age'] = df['age'].fillna(age_mean) fare_mean = df['fare'].mean() df['fare'] = df['fare'].fillna(fare_mean) df['sex']= df['sex'].map({'female':0, 'male': 1}).astype(int) x_OneHot_df = pd.get_dummies(data=df,columns=["embarked" ]) ndarray = x_OneHot_df.values Features = ndarray[:,1:] Label = ndarray[:,0] minmax_scale = preprocessing.MinMaxScaler(feature_range=(0, 1)) scaledFeatures=minmax_scale.fit_transform(Features) return scaledFeatures,Label # In[41]: train_Features,train_Label=PreprocessData(train_df) test_Features,test_Label=PreprocessData(test_df) # In[42]: train_Features[:2] # In[43]: train_Label[:2]
# coding: utf-8 # In[1]: import numpy import pandas as pd from sklearn import preprocessing numpy.random.seed(10) # In[2]: all_df = pd.read_excel("data/titanic3.xls") # In[3]: cols=['survived','name','pclass' ,'sex', 'age', 'sibsp', 'parch', 'fare', 'embarked'] all_df=all_df[cols] # In[4]: msk = numpy.random.rand(len(all_df)) < 0.8 train_df = all_df[msk] test_df = all_df[~msk] # In[5]: print('total:',len(all_df), 'train:',len(train_df), 'test:',len(test_df)) # In[6]: def PreprocessData(raw_df): df=raw_df.drop(['name'], axis=1) age_mean = df['age'].mean() df['age'] = df['age'].fillna(age_mean) fare_mean = df['fare'].mean() df['fare'] = df['fare'].fillna(fare_mean) df['sex']= df['sex'].map({'female':0, 'male': 1}).astype(int) x_OneHot_df = pd.get_dummies(data=df,columns=["embarked" ]) ndarray = x_OneHot_df.values Features = ndarray[:,1:] Label = ndarray[:,0] minmax_scale = preprocessing.MinMaxScaler(feature_range=(0, 1)) scaledFeatures=minmax_scale.fit_transform(Features) return scaledFeatures,Label # In[7]: train_Features,train_Label=PreprocessData(train_df) test_Features,test_Label=PreprocessData(test_df) # In[8]: from keras.models import Sequential from keras.layers import Dense,Dropout # In[9]: model = Sequential() # In[10]: model.add(Dense(units=40, input_dim=9, kernel_initializer='uniform', activation='relu')) # In[11]: model.add(Dense(units=30, kernel_initializer='uniform', activation='relu')) # In[12]: model.add(Dense(units=1, kernel_initializer='uniform', activation='sigmoid')) # In[13]: model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy']) # In[14]: train_history =model.fit(x=train_Features, y=train_Label, validation_split=0.1, epochs=30, batch_size=30,verbose=2) # In[15]: import matplotlib.pyplot as plt def show_train_history(train_history,train,validation): plt.plot(train_history.history[train]) plt.plot(train_history.history[validation]) plt.title('Train History') plt.ylabel(train) plt.xlabel('Epoch') plt.legend(['train', 'validation'], loc='upper left') plt.show() # In[16]: show_train_history(train_history,'acc','val_acc') # In[17]: show_train_history(train_history,'loss','val_loss') # In[18]: scores = model.evaluate(x=test_Features, y=test_Label) # In[19]: scores[1]