test_equivalence.py 6.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221
  1. # -*- coding: utf-8 -*-
  2. import numpy as np
  3. import pytest
  4. from pandas.compat import lrange, lzip, range
  5. import pandas as pd
  6. from pandas import Index, MultiIndex, Series
  7. import pandas.util.testing as tm
  8. def test_equals(idx):
  9. assert idx.equals(idx)
  10. assert idx.equals(idx.copy())
  11. assert idx.equals(idx.astype(object))
  12. assert not idx.equals(list(idx))
  13. assert not idx.equals(np.array(idx))
  14. same_values = Index(idx, dtype=object)
  15. assert idx.equals(same_values)
  16. assert same_values.equals(idx)
  17. if idx.nlevels == 1:
  18. # do not test MultiIndex
  19. assert not idx.equals(pd.Series(idx))
  20. def test_equals_op(idx):
  21. # GH9947, GH10637
  22. index_a = idx
  23. n = len(index_a)
  24. index_b = index_a[0:-1]
  25. index_c = index_a[0:-1].append(index_a[-2:-1])
  26. index_d = index_a[0:1]
  27. with pytest.raises(ValueError, match="Lengths must match"):
  28. index_a == index_b
  29. expected1 = np.array([True] * n)
  30. expected2 = np.array([True] * (n - 1) + [False])
  31. tm.assert_numpy_array_equal(index_a == index_a, expected1)
  32. tm.assert_numpy_array_equal(index_a == index_c, expected2)
  33. # test comparisons with numpy arrays
  34. array_a = np.array(index_a)
  35. array_b = np.array(index_a[0:-1])
  36. array_c = np.array(index_a[0:-1].append(index_a[-2:-1]))
  37. array_d = np.array(index_a[0:1])
  38. with pytest.raises(ValueError, match="Lengths must match"):
  39. index_a == array_b
  40. tm.assert_numpy_array_equal(index_a == array_a, expected1)
  41. tm.assert_numpy_array_equal(index_a == array_c, expected2)
  42. # test comparisons with Series
  43. series_a = Series(array_a)
  44. series_b = Series(array_b)
  45. series_c = Series(array_c)
  46. series_d = Series(array_d)
  47. with pytest.raises(ValueError, match="Lengths must match"):
  48. index_a == series_b
  49. tm.assert_numpy_array_equal(index_a == series_a, expected1)
  50. tm.assert_numpy_array_equal(index_a == series_c, expected2)
  51. # cases where length is 1 for one of them
  52. with pytest.raises(ValueError, match="Lengths must match"):
  53. index_a == index_d
  54. with pytest.raises(ValueError, match="Lengths must match"):
  55. index_a == series_d
  56. with pytest.raises(ValueError, match="Lengths must match"):
  57. index_a == array_d
  58. msg = "Can only compare identically-labeled Series objects"
  59. with pytest.raises(ValueError, match=msg):
  60. series_a == series_d
  61. with pytest.raises(ValueError, match="Lengths must match"):
  62. series_a == array_d
  63. # comparing with a scalar should broadcast; note that we are excluding
  64. # MultiIndex because in this case each item in the index is a tuple of
  65. # length 2, and therefore is considered an array of length 2 in the
  66. # comparison instead of a scalar
  67. if not isinstance(index_a, MultiIndex):
  68. expected3 = np.array([False] * (len(index_a) - 2) + [True, False])
  69. # assuming the 2nd to last item is unique in the data
  70. item = index_a[-2]
  71. tm.assert_numpy_array_equal(index_a == item, expected3)
  72. tm.assert_series_equal(series_a == item, Series(expected3))
  73. def test_equals_multi(idx):
  74. assert idx.equals(idx)
  75. assert not idx.equals(idx.values)
  76. assert idx.equals(Index(idx.values))
  77. assert idx.equal_levels(idx)
  78. assert not idx.equals(idx[:-1])
  79. assert not idx.equals(idx[-1])
  80. # different number of levels
  81. index = MultiIndex(levels=[Index(lrange(4)), Index(lrange(4)), Index(
  82. lrange(4))], codes=[np.array([0, 0, 1, 2, 2, 2, 3, 3]), np.array(
  83. [0, 1, 0, 0, 0, 1, 0, 1]), np.array([1, 0, 1, 1, 0, 0, 1, 0])])
  84. index2 = MultiIndex(levels=index.levels[:-1], codes=index.codes[:-1])
  85. assert not index.equals(index2)
  86. assert not index.equal_levels(index2)
  87. # levels are different
  88. major_axis = Index(lrange(4))
  89. minor_axis = Index(lrange(2))
  90. major_codes = np.array([0, 0, 1, 2, 2, 3])
  91. minor_codes = np.array([0, 1, 0, 0, 1, 0])
  92. index = MultiIndex(levels=[major_axis, minor_axis],
  93. codes=[major_codes, minor_codes])
  94. assert not idx.equals(index)
  95. assert not idx.equal_levels(index)
  96. # some of the labels are different
  97. major_axis = Index(['foo', 'bar', 'baz', 'qux'])
  98. minor_axis = Index(['one', 'two'])
  99. major_codes = np.array([0, 0, 2, 2, 3, 3])
  100. minor_codes = np.array([0, 1, 0, 1, 0, 1])
  101. index = MultiIndex(levels=[major_axis, minor_axis],
  102. codes=[major_codes, minor_codes])
  103. assert not idx.equals(index)
  104. def test_identical(idx):
  105. mi = idx.copy()
  106. mi2 = idx.copy()
  107. assert mi.identical(mi2)
  108. mi = mi.set_names(['new1', 'new2'])
  109. assert mi.equals(mi2)
  110. assert not mi.identical(mi2)
  111. mi2 = mi2.set_names(['new1', 'new2'])
  112. assert mi.identical(mi2)
  113. mi3 = Index(mi.tolist(), names=mi.names)
  114. mi4 = Index(mi.tolist(), names=mi.names, tupleize_cols=False)
  115. assert mi.identical(mi3)
  116. assert not mi.identical(mi4)
  117. assert mi.equals(mi4)
  118. def test_equals_operator(idx):
  119. # GH9785
  120. assert (idx == idx).all()
  121. def test_equals_missing_values():
  122. # make sure take is not using -1
  123. i = pd.MultiIndex.from_tuples([(0, pd.NaT),
  124. (0, pd.Timestamp('20130101'))])
  125. result = i[0:1].equals(i[0])
  126. assert not result
  127. result = i[1:2].equals(i[1])
  128. assert not result
  129. def test_is_():
  130. mi = MultiIndex.from_tuples(lzip(range(10), range(10)))
  131. assert mi.is_(mi)
  132. assert mi.is_(mi.view())
  133. assert mi.is_(mi.view().view().view().view())
  134. mi2 = mi.view()
  135. # names are metadata, they don't change id
  136. mi2.names = ["A", "B"]
  137. assert mi2.is_(mi)
  138. assert mi.is_(mi2)
  139. assert mi.is_(mi.set_names(["C", "D"]))
  140. mi2 = mi.view()
  141. mi2.set_names(["E", "F"], inplace=True)
  142. assert mi.is_(mi2)
  143. # levels are inherent properties, they change identity
  144. mi3 = mi2.set_levels([lrange(10), lrange(10)])
  145. assert not mi3.is_(mi2)
  146. # shouldn't change
  147. assert mi2.is_(mi)
  148. mi4 = mi3.view()
  149. # GH 17464 - Remove duplicate MultiIndex levels
  150. mi4.set_levels([lrange(10), lrange(10)], inplace=True)
  151. assert not mi4.is_(mi3)
  152. mi5 = mi.view()
  153. mi5.set_levels(mi5.levels, inplace=True)
  154. assert not mi5.is_(mi)
  155. def test_is_all_dates(idx):
  156. assert not idx.is_all_dates
  157. def test_is_numeric(idx):
  158. # MultiIndex is never numeric
  159. assert not idx.is_numeric()
  160. def test_multiindex_compare():
  161. # GH 21149
  162. # Ensure comparison operations for MultiIndex with nlevels == 1
  163. # behave consistently with those for MultiIndex with nlevels > 1
  164. midx = pd.MultiIndex.from_product([[0, 1]])
  165. # Equality self-test: MultiIndex object vs self
  166. expected = pd.Series([True, True])
  167. result = pd.Series(midx == midx)
  168. tm.assert_series_equal(result, expected)
  169. # Greater than comparison: MultiIndex object vs self
  170. expected = pd.Series([False, False])
  171. result = pd.Series(midx > midx)
  172. tm.assert_series_equal(result, expected)