zoukankan html css js c++ java

缺失值处理

import pandas as pd
import numpy as np
from sklearn.preprocessing import Imputer
#生成缺失数据
df = pd.DataFrame(np.random.randn(6,4),columns=['col1','col2','col3','col4'])
df.iloc[1:2,1] = np.nan
df.iloc[4,3] = np.nan
print(df)
#查看哪些数据缺失
nan_all = df.isnull()#获得数据框中的NA值
print(nan_all)
#查看哪些列缺失
nan_col1 = df.isnull().any()#获得含有NA的列
nan_col2 = df.isnull().all()#获得全部为NA的列
print(nan_col1)
print(nan_col2)

#丢弃缺失值
df2 = df.dropna()
print(df2)

#使用sklearn将缺失值换成特定值
nan_model = Imputer(missing_value='NaN',strategy='mean',axis=0)#建立替换规则：将值为Nan的缺失值用均值做替换
nan_result = nan_model.fit_transform(df)#应用模型规则
print(nan_result)

#使用pandas将缺失值换为特定值
nan_result_pd1 = df.fillna(method='backfill')#用后面的值替换缺失值
nan_result_pd2 = df.fillna(method='bfill',limit=1)#用后面的值替换缺失值，限制每列只能替换一个缺失值
nan_result_pd3 = df.fillna(method='pad')#用前面的值替换缺失值
nan_result_pd4 = df.fillna(0)#用0替换缺失值
nan_result_pd5 = df.fillna({'col2':1.1,'col4':1.2})#用不同值替换不同列的缺失值
nan_result_pd6 = df.fillna(df.mean()['col2':'col4'])#用平均值代替，选择各自列的均值替换缺失值
#打印输出
print(nan_result_pd1)
print(nan_result_pd2)
print(nan_result_pd3)
print(nan_result_pd4)
print(nan_result_pd5)
print(nan_result_pd6)

import pandas as pd
import numpy as np
from sklearn.preprocessing import Imputer
#生成缺失数据
df = pd.DataFrame(np.random.randn(6,4),columns=['col1','col2','col3','col4'])
df.iloc[1:2,1] = np.nan
df.iloc[4,3] = np.nan
print(df)
#查看哪些数据缺失
nan_all = df.isnull()#获得数据框中的NA值
print(nan_all)
#查看哪些列缺失
nan_col1 = df.isnull().any()#获得含有NA的列
nan_col2 = df.isnull().all()#获得全部为NA的列
print(nan_col1)
print(nan_col2)

#丢弃缺失值
df2 = df.dropna()
print(df2)

#使用sklearn将缺失值换成特定值
nan_model = Imputer(missing_value='NaN',strategy='mean',axis=0)#建立替换规则：将值为Nan的缺失值用均值做替换
nan_result = nan_model.fit_transform(df)#应用模型规则
print(nan_result)

#使用pandas将缺失值换为特定值
nan_result_pd1 = df.fillna(method='backfill')#用后面的值替换缺失值
nan_result_pd2 = df.fillna(method='bfill',limit=1)#用后面的值替换缺失值，限制每列只能替换一个缺失值
nan_result_pd3 = df.fillna(method='pad')#用前面的值替换缺失值
nan_result_pd4 = df.fillna(0)#用0替换缺失值
nan_result_pd5 = df.fillna({'col2':1.1,'col4':1.2})#用不同值替换不同列的缺失值
nan_result_pd6 = df.fillna(df.mean()['col2':'col4'])#用平均值代替，选择各自列的均值替换缺失值
#打印输出
print(nan_result_pd1)
print(nan_result_pd2)
print(nan_result_pd3)
print(nan_result_pd4)
print(nan_result_pd5)
print(nan_result_pd6)

查看全文

相关阅读:
Codeforces 371D Vessels
HDU1272小希的迷宫–并查集
 golang：exported function Script should have comment or be unexported
动态规划--0,1背包问题（再也不怕类似背包问题了）
golang数据结构之稀疏数组
 向github中已创建好的repository提交文件
 java（二）变量
 使用Git上传文件到github
java（一）基础知识
 pytorch--基础类型之间的转换

原文地址：https://www.cnblogs.com/qiuyuyu/p/10059194.html