import pandas as pd if __name__ == '__main__': student = pd.read_excel("C:/Users/18124/Desktop/pandas/020_定位_消除重复数据/副本Students_Duplicates.xlsx", engine="openpyxl") print(student) # 1 删除数据 : 重复数据 - 单列匹配 # student.drop_duplicates(subset="Name", inplace=True) # 2 删除数据 : 重复数据 - 多列匹配 # student.drop_duplicates(subset=["Name", "Test_1", "Test_2"], inplace=True) # 3 删除数据 : 单列匹配 - 删除前面重复 student.drop_duplicates(subset="Name", inplace=True, keep="last") # keep="last" - 保留后面数据 print(student)
import pandas as pd if __name__ == '__main__': student = pd.read_excel("C:/Users/18124/Desktop/pandas/020_定位_消除重复数据/副本Students_Duplicates.xlsx", engine="openpyxl") print(student) # 1 查看重复项 - 是否存在重复 dupe = student.duplicated(subset="Name") print(dupe.any()) # 判断dupe - 是否存在True # 2 查找重复项 - 打印信息 dupe = dupe[dupe == True] # 数据过滤 print(student.iloc[dupe.index]) # 通过index定位