# 2.1处理缺失值,连续值用均值填充 continuous_fillna_number = [] for i in train_null_ix: if(i in continuous_ix): mean_v = df_train[i].mean() continuous_fillna_number.append(mean_v) df_train[i] = df_train[i].fillna(mean_v) np.save("continuous_fillna_number.npy", continuous_fillna_number) #保存下来,test时候用到(增量test也可以用)
# 2.2离散值Nan映射成"<unk>" for i in train_null_ix: if(i in categorical_ix): df_train[i].fillna("<unk>")