# 0 读取数据 import pandas as pd df = pd.read_csv("beijing_tianqi_2018.csv") # 换掉温度后面的后缀 df.loc[:,"bWendu"] = df["bWendu"].str.replace("℃","").astype("int32") df.loc[:,"yWendu"] = df["yWendu"].str.replace("℃","").astype("int32") # 1 复现 # 只选出3月份的数据用于分析 condition = df["ymd"].str.startswith("2018-03") # 设置温差 df[condition]["wen_cha"] = df["bWendu"] - df["yWendu"] # 查看是否修改成功 df[condition].head() D:Anacondalibsite-packagesipykernel_launcher.py:6: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame. Try using .loc[row_indexer,col_indexer] = value instead See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy # 2 原因 # 发出警告的代码 df[condition]["wen_cha"] = df["bWendu"] - df["yWendu"] # 相当于: df.get(condition)set(wen_cha),第一步骤get发出了警报 # 链式操作其实就是两个步骤,先get后set,get得到的dataframe可能是view也可能是copy,pandas发出警告 # 核心要诀:pandas的dataframe的修改写操作,只允许在源dataframe上进行,一步到位 # 3 解决办法1 df.loc[condition,"wen_cha"] = df["bWendu"] - df["yWendu"] df[condition].head() # 4 解决方法2 # 如果需要筛选数据做后续的处理分析,使用copy复制dataframe df_month3 = df[condition].copy() df_month3.head() df_month3["wen_cha"] = df["bWendu"] - df["yWendu"] df_month3.head() # 总之,pandas不允许先筛选自dataframe,再进行修改写入 # 要么使用.loc实现一个步骤直接修改源dataframe # 要么先复制一个子dataframe再一个步骤执行修改