test_assert_frame_equal.py 6.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209
  1. # -*- coding: utf-8 -*-
  2. import pytest
  3. from pandas import DataFrame
  4. from pandas.util.testing import assert_frame_equal
  5. @pytest.fixture(params=[True, False])
  6. def by_blocks(request):
  7. return request.param
  8. def _assert_frame_equal_both(a, b, **kwargs):
  9. """
  10. Check that two DataFrame equal.
  11. This check is performed commutatively.
  12. Parameters
  13. ----------
  14. a : DataFrame
  15. The first DataFrame to compare.
  16. b : DataFrame
  17. The second DataFrame to compare.
  18. kwargs : dict
  19. The arguments passed to `assert_frame_equal`.
  20. """
  21. assert_frame_equal(a, b, **kwargs)
  22. assert_frame_equal(b, a, **kwargs)
  23. def _assert_not_frame_equal(a, b, **kwargs):
  24. """
  25. Check that two DataFrame are not equal.
  26. Parameters
  27. ----------
  28. a : DataFrame
  29. The first DataFrame to compare.
  30. b : DataFrame
  31. The second DataFrame to compare.
  32. kwargs : dict
  33. The arguments passed to `assert_frame_equal`.
  34. """
  35. try:
  36. assert_frame_equal(a, b, **kwargs)
  37. msg = "The two DataFrames were equal when they shouldn't have been"
  38. pytest.fail(msg=msg)
  39. except AssertionError:
  40. pass
  41. def _assert_not_frame_equal_both(a, b, **kwargs):
  42. """
  43. Check that two DataFrame are not equal.
  44. This check is performed commutatively.
  45. Parameters
  46. ----------
  47. a : DataFrame
  48. The first DataFrame to compare.
  49. b : DataFrame
  50. The second DataFrame to compare.
  51. kwargs : dict
  52. The arguments passed to `assert_frame_equal`.
  53. """
  54. _assert_not_frame_equal(a, b, **kwargs)
  55. _assert_not_frame_equal(b, a, **kwargs)
  56. @pytest.mark.parametrize("check_like", [True, False])
  57. def test_frame_equal_row_order_mismatch(check_like):
  58. df1 = DataFrame({"A": [1, 2, 3], "B": [4, 5, 6]},
  59. index=["a", "b", "c"])
  60. df2 = DataFrame({"A": [3, 2, 1], "B": [6, 5, 4]},
  61. index=["c", "b", "a"])
  62. if not check_like: # Do not ignore row-column orderings.
  63. msg = "DataFrame.index are different"
  64. with pytest.raises(AssertionError, match=msg):
  65. assert_frame_equal(df1, df2, check_like=check_like)
  66. else:
  67. _assert_frame_equal_both(df1, df2, check_like=check_like)
  68. @pytest.mark.parametrize("df1,df2", [
  69. (DataFrame({"A": [1, 2, 3]}), DataFrame({"A": [1, 2, 3, 4]})),
  70. (DataFrame({"A": [1, 2, 3], "B": [4, 5, 6]}), DataFrame({"A": [1, 2, 3]})),
  71. ])
  72. def test_frame_equal_shape_mismatch(df1, df2):
  73. msg = "DataFrame are different"
  74. with pytest.raises(AssertionError, match=msg):
  75. assert_frame_equal(df1, df2)
  76. @pytest.mark.parametrize("df1,df2,msg", [
  77. # Index
  78. (DataFrame.from_records({"a": [1, 2],
  79. "c": ["l1", "l2"]}, index=["a"]),
  80. DataFrame.from_records({"a": [1.0, 2.0],
  81. "c": ["l1", "l2"]}, index=["a"]),
  82. "DataFrame\\.index are different"),
  83. # MultiIndex
  84. (DataFrame.from_records({"a": [1, 2], "b": [2.1, 1.5],
  85. "c": ["l1", "l2"]}, index=["a", "b"]),
  86. DataFrame.from_records({"a": [1.0, 2.0], "b": [2.1, 1.5],
  87. "c": ["l1", "l2"]}, index=["a", "b"]),
  88. "MultiIndex level \\[0\\] are different")
  89. ])
  90. def test_frame_equal_index_dtype_mismatch(df1, df2, msg, check_index_type):
  91. kwargs = dict(check_index_type=check_index_type)
  92. if check_index_type:
  93. with pytest.raises(AssertionError, match=msg):
  94. assert_frame_equal(df1, df2, **kwargs)
  95. else:
  96. assert_frame_equal(df1, df2, **kwargs)
  97. def test_empty_dtypes(check_dtype):
  98. columns = ["col1", "col2"]
  99. df1 = DataFrame(columns=columns)
  100. df2 = DataFrame(columns=columns)
  101. kwargs = dict(check_dtype=check_dtype)
  102. df1["col1"] = df1["col1"].astype("int64")
  103. if check_dtype:
  104. msg = "Attributes are different"
  105. with pytest.raises(AssertionError, match=msg):
  106. assert_frame_equal(df1, df2, **kwargs)
  107. else:
  108. assert_frame_equal(df1, df2, **kwargs)
  109. def test_frame_equal_index_mismatch():
  110. msg = """DataFrame\\.index are different
  111. DataFrame\\.index values are different \\(33\\.33333 %\\)
  112. \\[left\\]: Index\\(\\[u?'a', u?'b', u?'c'\\], dtype='object'\\)
  113. \\[right\\]: Index\\(\\[u?'a', u?'b', u?'d'\\], dtype='object'\\)"""
  114. df1 = DataFrame({"A": [1, 2, 3], "B": [4, 5, 6]},
  115. index=["a", "b", "c"])
  116. df2 = DataFrame({"A": [1, 2, 3], "B": [4, 5, 6]},
  117. index=["a", "b", "d"])
  118. with pytest.raises(AssertionError, match=msg):
  119. assert_frame_equal(df1, df2)
  120. def test_frame_equal_columns_mismatch():
  121. msg = """DataFrame\\.columns are different
  122. DataFrame\\.columns values are different \\(50\\.0 %\\)
  123. \\[left\\]: Index\\(\\[u?'A', u?'B'\\], dtype='object'\\)
  124. \\[right\\]: Index\\(\\[u?'A', u?'b'\\], dtype='object'\\)"""
  125. df1 = DataFrame({"A": [1, 2, 3], "B": [4, 5, 6]},
  126. index=["a", "b", "c"])
  127. df2 = DataFrame({"A": [1, 2, 3], "b": [4, 5, 6]},
  128. index=["a", "b", "c"])
  129. with pytest.raises(AssertionError, match=msg):
  130. assert_frame_equal(df1, df2)
  131. def test_frame_equal_block_mismatch(by_blocks):
  132. msg = """DataFrame\\.iloc\\[:, 1\\] are different
  133. DataFrame\\.iloc\\[:, 1\\] values are different \\(33\\.33333 %\\)
  134. \\[left\\]: \\[4, 5, 6\\]
  135. \\[right\\]: \\[4, 5, 7\\]"""
  136. df1 = DataFrame({"A": [1, 2, 3], "B": [4, 5, 6]})
  137. df2 = DataFrame({"A": [1, 2, 3], "B": [4, 5, 7]})
  138. with pytest.raises(AssertionError, match=msg):
  139. assert_frame_equal(df1, df2, by_blocks=by_blocks)
  140. @pytest.mark.parametrize("df1,df2,msg", [
  141. (DataFrame({"A": [u"á", u"à", u"ä"], "E": [u"é", u"è", u"ë"]}),
  142. DataFrame({"A": [u"á", u"à", u"ä"], "E": [u"é", u"è", u"e̊"]}),
  143. """DataFrame\\.iloc\\[:, 1\\] are different
  144. DataFrame\\.iloc\\[:, 1\\] values are different \\(33\\.33333 %\\)
  145. \\[left\\]: \\[é, è, ë\\]
  146. \\[right\\]: \\[é, è, e̊\\]"""),
  147. (DataFrame({"A": [u"á", u"à", u"ä"], "E": [u"é", u"è", u"ë"]}),
  148. DataFrame({"A": ["a", "a", "a"], "E": ["e", "e", "e"]}),
  149. """DataFrame\\.iloc\\[:, 0\\] are different
  150. DataFrame\\.iloc\\[:, 0\\] values are different \\(100\\.0 %\\)
  151. \\[left\\]: \\[á, à, ä\\]
  152. \\[right\\]: \\[a, a, a\\]"""),
  153. ])
  154. def test_frame_equal_unicode(df1, df2, msg, by_blocks):
  155. # see gh-20503
  156. #
  157. # Test ensures that `assert_frame_equals` raises the right exception
  158. # when comparing DataFrames containing differing unicode objects.
  159. with pytest.raises(AssertionError, match=msg):
  160. assert_frame_equal(df1, df2, by_blocks=by_blocks)