Pandas——比较两个dataframe之间的区别

zoukankan html css js c++ java

Pandas——比较两个dataframe之间的区别
import pandas as pd import datacompy #导入datafcompy库比较两个dataframe之间的区别 #同时也可以比较两个txt之间的区别 df1 = pd.read_csv("1.txt",header=None) df2 = pd.read_csv("22.txt",header=None) dd = datacompy.Compare(df1,df2,join_columns=0) print(dd.report())
结果

DataComPy Comparison
--------------------

DataFrame Summary
-----------------

DataFrame Columns Rows
0 df1 1 2
1 df2 1 6

Column Summary
--------------

Number of columns in common: 1
Number of columns in df1 but not in df2: 0
Number of columns in df2 but not in df1: 0

Row Summary
-----------

Matched on: 0
Any duplicates on match values: No
Absolute Tolerance: 0
Relative Tolerance: 0
Number of rows in common: 2
Number of rows in df1 but not in df2: 0
Number of rows in df2 but not in df1: 4

Number of rows with some compared columns unequal: 0
Number of rows with all compared columns equal: 2

Column Comparison
-----------------

Number of columns compared with some values unequal: 0
Number of columns compared with all values equal: 1
Total number of values which compare unequal: 0

Sample Rows Only in df2 (First 10 Columns)
------------------------------------------

0
3 vasdj
4 顺嘿嘿
5 顺顺
2 afdlkaewlhg
import pandas as pd import datacompy import glob import os all_files1 = glob.glob(r"C:Users15773Desktop est1*.txt") all_files2 = glob.glob(r"C:Users15773Desktop est2*.txt") for file1 in all_files1: file1_basename = os.path.basename(file1) for file2 in all_files2: file2_basename = os.path.basename(file2) if file1_basename == file2_basename: df1 = pd.read_csv(file1,header=None) df2 = pd.read_csv(file2,header=None) dd = datacompy.Compare(df1,df2,join_columns=0) report = dd.report() print(df1) print(df2) print(report) txt_name = str(file1_basename)+"_result.txt" result_txt = open(txt_name,'w') result_txt.write(report) result_txt.close() print("process done")
from io import StringIO import pandas as pd import datacompy data1 = """acct_id,dollar_amt,name,float_fld,date_fld 10000001234,123.45,George Maharis,14530.1555,2017-01-01 10000001235,0.45,Michael Bluth,1,2017-01-01 10000001236,1345,George Bluth,,2017-01-01 10000001237,123456,Bob Loblaw,345.12,2017-01-01 10000001239,1.05,Lucille Bluth,,2017-01-01 """ data2 = """acct_id,dollar_amt,name,float_fld 10000001234,123.4,George Michael Bluth,14530.155 10000001235,0.45,Michael Bluth, 10000001236,1345,George Bluth,1 10000001237,123456,Robert Loblaw,345.12 10000001238,1.05,Loose Seal Bluth,111 """ df1 = pd.read_csv(StringIO(data1)) df2 = pd.read_csv(StringIO(data2)) compare = datacompy.Compare( df1, df2, join_columns='acct_id', #You can also specify a list of columns abs_tol=0, #Optional, defaults to 0 rel_tol=0, #Optional, defaults to 0 df1_name='Original', #Optional, defaults to 'df1' df2_name='New' #Optional, defaults to 'df2' ) compare.matches(ignore_extra_columns=False) # False # This method prints out a human-readable report summarizing and sampling differences print(compare.report())
DataComPy Comparison
--------------------

DataFrame Summary
-----------------

DataFrame Columns Rows
0 Original 5 5
1 New 4 5

Column Summary
--------------

Number of columns in common: 4
Number of columns in Original but not in New: 1
Number of columns in New but not in Original: 0

Row Summary
-----------

Matched on: acct_id
Any duplicates on match values: No
Absolute Tolerance: 0
Relative Tolerance: 0
Number of rows in common: 4
Number of rows in Original but not in New: 1
Number of rows in New but not in Original: 1

Number of rows with some compared columns unequal: 4
Number of rows with all compared columns equal: 0

Column Comparison
-----------------

Number of columns compared with some values unequal: 3
Number of columns compared with all values equal: 1
Total number of values which compare unequal: 6

Columns with Unequal Values or Types
------------------------------------

Column Original dtype New dtype # Unequal Max Diff # Null Diff
2 dollar_amt float64 float64 1 0.0500 0
0 float_fld float64 float64 3 0.0005 2
1 name object object 2 0.0000 0

Sample Rows with Unequal Values
-------------------------------

acct_id float_fld (Original) float_fld (New)
2 10000001236 NaN 1.000
0 10000001234 14530.1555 14530.155
1 10000001235 1.0000 NaN

acct_id name (Original) name (New)
3 10000001237 Bob Loblaw Robert Loblaw
0 10000001234 George Maharis George Michael Bluth

acct_id dollar_amt (Original) dollar_amt (New)
0 10000001234 123.45 123.4

Sample Rows Only in Original (First 10 Columns)
-----------------------------------------------

acct_id dollar_amt name float_fld date_fld
4 10000001239 1.05 Lucille Bluth NaN 2017-01-01

Sample Rows Only in New (First 10 Columns)
------------------------------------------

acct_id dollar_amt name float_fld
5 10000001238 1.05 Loose Seal Bluth 111.0
查看全文

相关阅读:
整合Spring与Hibernate
基本正则
 vue权限指令
 vue数字动态转换大写
 element ui 表格动态生成多级表头、一级表头，可无限嵌套
 vuex和vue-router全家桶
 element表格内容过多title提示
 HBuilder打包App方法
 mui底部选项卡切换页面
 mui框架的地步选项卡公用加载对应页面demo

原文地址：https://www.cnblogs.com/shunguo/p/14567902.html