df = pd.read_csv(r_f1) df['machine_Id'] = df['UserId']+df['knowledge_encoding'] df.rename(columns={"UserId": "old_UserId","machine_Id":"UserId"},inplace=True) df.to_csv(w_f1,index=False)
把其中的int类型转成字符串
df['date'] = df['year'].map(str)+"/"+df['month'].map(str)+"/"+df['day'].map(str)
将将列映射成字典
df[["a", "b"]].set_index("a").to_dict()["b"] # output {1: 'q', 2: 'q', 3: 'w'}
将三列映射成字典
import pandas as pd from collections import defaultdict df = pd.DataFrame([[1,"girl","lisa"],[2,"girl","luxi"],[3,"boy","alika"],[4,"boy","join"]],columns = ["id","sex","name"]) result = defaultdict(dict) for id,sex,name in df.itertuples(index = False): result[id][sex] = name print(result) # defaultdict(<class 'dict'>, {1: {'girl': 'lisa'}, 2: {'girl': 'luxi'}, 3: {'boy': 'alika'}, 4: {'boy': 'join'}})
把df中a列的空值用b列的值做补充
df['a'][df['a'].isnull()] = df['b'][df['a'].isnull()]