比较两个不同的数据框可以通过以下步骤来实现:
示例代码如下:
import pandas as pd import numpy as np
#读取两个数据框 df1 = pd.read_csv('data1.csv') df2 = pd.read_csv('data2.csv')
#合并两个数据框并标记相同列中的差异 merged_df = pd.merge(df1, df2, on='id', how='outer', suffixes=('_left', '_right')) merged_df['diff_data'] = np.where( merged_df['var1_left'] != merged_df['var1_right'], #条件判断 'var1_diff', #不相同的标记 np.where( merged_df['var2_left'] != merged_df['var2_right'], 'var2_diff', np.where( merged_df['var3_left'] != merged_df['var3_right'], 'var3_diff', 'no_diff' ))))
#筛选出差异的数据 diff_df = merged_df[merged_df['diff_data'] != 'no_diff'] print(diff_df)